LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG. For fast lookup and
174 /// deduplication, the index of the node in this vector is stored in the
175 /// node in SDNode::CombinerWorklistIndex.
177
178 /// This records all nodes attempted to be added to the worklist since we
179 /// considered a new worklist entry. As we keep do not add duplicate nodes
180 /// in the worklist, this is different from the tail of the worklist.
182
183 /// Map from candidate StoreNode to the pair of RootNode and count.
184 /// The count is used to track how many times we have seen the StoreNode
185 /// with the same RootNode bail out in dependence check. If we have seen
186 /// the bail out for the same pair many times over a limit, we won't
187 /// consider the StoreNode with the same RootNode as store merging
188 /// candidate again.
190
191 // AA - Used for DAG load/store alias analysis.
192 AliasAnalysis *AA;
193
194 /// When an instruction is simplified, add all users of the instruction to
195 /// the work lists because they might get more simplified now.
196 void AddUsersToWorklist(SDNode *N) {
197 for (SDNode *Node : N->uses())
198 AddToWorklist(Node);
199 }
200
201 /// Convenient shorthand to add a node and all of its user to the worklist.
202 void AddToWorklistWithUsers(SDNode *N) {
203 AddUsersToWorklist(N);
204 AddToWorklist(N);
205 }
206
207 // Prune potentially dangling nodes. This is called after
208 // any visit to a node, but should also be called during a visit after any
209 // failed combine which may have created a DAG node.
210 void clearAddedDanglingWorklistEntries() {
211 // Check any nodes added to the worklist to see if they are prunable.
212 while (!PruningList.empty()) {
213 auto *N = PruningList.pop_back_val();
214 if (N->use_empty())
215 recursivelyDeleteUnusedNodes(N);
216 }
217 }
218
219 SDNode *getNextWorklistEntry() {
220 // Before we do any work, remove nodes that are not in use.
221 clearAddedDanglingWorklistEntries();
222 SDNode *N = nullptr;
223 // The Worklist holds the SDNodes in order, but it may contain null
224 // entries.
225 while (!N && !Worklist.empty()) {
226 N = Worklist.pop_back_val();
227 }
228
229 if (N) {
230 assert(N->getCombinerWorklistIndex() >= 0 &&
231 "Found a worklist entry without a corresponding map entry!");
232 // Set to -2 to indicate that we combined the node.
233 N->setCombinerWorklistIndex(-2);
234 }
235 return N;
236 }
237
238 /// Call the node-specific routine that folds each particular type of node.
239 SDValue visit(SDNode *N);
240
241 public:
242 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
243 : DAG(D), TLI(D.getTargetLoweringInfo()),
244 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
245 ForCodeSize = DAG.shouldOptForSize();
246 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
247
248 MaximumLegalStoreInBits = 0;
249 // We use the minimum store size here, since that's all we can guarantee
250 // for the scalable vector types.
251 for (MVT VT : MVT::all_valuetypes())
252 if (EVT(VT).isSimple() && VT != MVT::Other &&
253 TLI.isTypeLegal(EVT(VT)) &&
254 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
255 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
256 }
257
258 void ConsiderForPruning(SDNode *N) {
259 // Mark this for potential pruning.
260 PruningList.insert(N);
261 }
262
263 /// Add to the worklist making sure its instance is at the back (next to be
264 /// processed.)
265 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
266 bool SkipIfCombinedBefore = false) {
267 assert(N->getOpcode() != ISD::DELETED_NODE &&
268 "Deleted Node added to Worklist");
269
270 // Skip handle nodes as they can't usefully be combined and confuse the
271 // zero-use deletion strategy.
272 if (N->getOpcode() == ISD::HANDLENODE)
273 return;
274
275 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
276 return;
277
278 if (IsCandidateForPruning)
279 ConsiderForPruning(N);
280
281 if (N->getCombinerWorklistIndex() < 0) {
282 N->setCombinerWorklistIndex(Worklist.size());
283 Worklist.push_back(N);
284 }
285 }
286
287 /// Remove all instances of N from the worklist.
288 void removeFromWorklist(SDNode *N) {
289 PruningList.remove(N);
290 StoreRootCountMap.erase(N);
291
292 int WorklistIndex = N->getCombinerWorklistIndex();
293 // If not in the worklist, the index might be -1 or -2 (was combined
294 // before). As the node gets deleted anyway, there's no need to update
295 // the index.
296 if (WorklistIndex < 0)
297 return; // Not in the worklist.
298
299 // Null out the entry rather than erasing it to avoid a linear operation.
300 Worklist[WorklistIndex] = nullptr;
301 N->setCombinerWorklistIndex(-1);
302 }
303
304 void deleteAndRecombine(SDNode *N);
305 bool recursivelyDeleteUnusedNodes(SDNode *N);
306
307 /// Replaces all uses of the results of one DAG node with new values.
308 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
309 bool AddTo = true);
310
311 /// Replaces all uses of the results of one DAG node with new values.
312 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
313 return CombineTo(N, &Res, 1, AddTo);
314 }
315
316 /// Replaces all uses of the results of one DAG node with new values.
317 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
318 bool AddTo = true) {
319 SDValue To[] = { Res0, Res1 };
320 return CombineTo(N, To, 2, AddTo);
321 }
322
323 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
324
325 private:
326 unsigned MaximumLegalStoreInBits;
327
328 /// Check the specified integer node value to see if it can be simplified or
329 /// if things it uses can be simplified by bit propagation.
330 /// If so, return true.
331 bool SimplifyDemandedBits(SDValue Op) {
332 unsigned BitWidth = Op.getScalarValueSizeInBits();
334 return SimplifyDemandedBits(Op, DemandedBits);
335 }
336
337 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
338 EVT VT = Op.getValueType();
339 APInt DemandedElts = VT.isFixedLengthVector()
341 : APInt(1, 1);
342 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
343 }
344
345 /// Check the specified vector node value to see if it can be simplified or
346 /// if things it uses can be simplified as it only uses some of the
347 /// elements. If so, return true.
348 bool SimplifyDemandedVectorElts(SDValue Op) {
349 // TODO: For now just pretend it cannot be simplified.
350 if (Op.getValueType().isScalableVector())
351 return false;
352
353 unsigned NumElts = Op.getValueType().getVectorNumElements();
354 APInt DemandedElts = APInt::getAllOnes(NumElts);
355 return SimplifyDemandedVectorElts(Op, DemandedElts);
356 }
357
358 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
359 const APInt &DemandedElts,
360 bool AssumeSingleUse = false);
361 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
362 bool AssumeSingleUse = false);
363
364 bool CombineToPreIndexedLoadStore(SDNode *N);
365 bool CombineToPostIndexedLoadStore(SDNode *N);
366 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
367 bool SliceUpLoad(SDNode *N);
368
369 // Looks up the chain to find a unique (unaliased) store feeding the passed
370 // load. If no such store is found, returns a nullptr.
371 // Note: This will look past a CALLSEQ_START if the load is chained to it so
372 // so that it can find stack stores for byval params.
373 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
374 // Scalars have size 0 to distinguish from singleton vectors.
375 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
376 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
377 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
378
379 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
380 /// load.
381 ///
382 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
383 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
384 /// \param EltNo index of the vector element to load.
385 /// \param OriginalLoad load that EVE came from to be replaced.
386 /// \returns EVE on success SDValue() on failure.
387 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
388 SDValue EltNo,
389 LoadSDNode *OriginalLoad);
390 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
391 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
392 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
393 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
394 SDValue PromoteIntBinOp(SDValue Op);
395 SDValue PromoteIntShiftOp(SDValue Op);
396 SDValue PromoteExtend(SDValue Op);
397 bool PromoteLoad(SDValue Op);
398
399 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
400 SDValue RHS, SDValue True, SDValue False,
402
403 /// Call the node-specific routine that knows how to fold each
404 /// particular type of node. If that doesn't do anything, try the
405 /// target-specific DAG combines.
406 SDValue combine(SDNode *N);
407
408 // Visitation implementation - Implement dag node combining for different
409 // node types. The semantics are as follows:
410 // Return Value:
411 // SDValue.getNode() == 0 - No change was made
412 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
413 // otherwise - N should be replaced by the returned Operand.
414 //
415 SDValue visitTokenFactor(SDNode *N);
416 SDValue visitMERGE_VALUES(SDNode *N);
417 SDValue visitADD(SDNode *N);
418 SDValue visitADDLike(SDNode *N);
419 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
420 SDValue visitSUB(SDNode *N);
421 SDValue visitADDSAT(SDNode *N);
422 SDValue visitSUBSAT(SDNode *N);
423 SDValue visitADDC(SDNode *N);
424 SDValue visitADDO(SDNode *N);
425 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
426 SDValue visitSUBC(SDNode *N);
427 SDValue visitSUBO(SDNode *N);
428 SDValue visitADDE(SDNode *N);
429 SDValue visitUADDO_CARRY(SDNode *N);
430 SDValue visitSADDO_CARRY(SDNode *N);
431 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
432 SDNode *N);
433 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
434 SDNode *N);
435 SDValue visitSUBE(SDNode *N);
436 SDValue visitUSUBO_CARRY(SDNode *N);
437 SDValue visitSSUBO_CARRY(SDNode *N);
438 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
439 SDValue visitMULFIX(SDNode *N);
440 SDValue useDivRem(SDNode *N);
441 SDValue visitSDIV(SDNode *N);
442 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
443 SDValue visitUDIV(SDNode *N);
444 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
445 SDValue visitREM(SDNode *N);
446 SDValue visitMULHU(SDNode *N);
447 SDValue visitMULHS(SDNode *N);
448 SDValue visitAVG(SDNode *N);
449 SDValue visitABD(SDNode *N);
450 SDValue visitSMUL_LOHI(SDNode *N);
451 SDValue visitUMUL_LOHI(SDNode *N);
452 SDValue visitMULO(SDNode *N);
453 SDValue visitIMINMAX(SDNode *N);
454 SDValue visitAND(SDNode *N);
455 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitOR(SDNode *N);
457 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
458 SDValue visitXOR(SDNode *N);
459 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
460 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
461 SDValue visitSHL(SDNode *N);
462 SDValue visitSRA(SDNode *N);
463 SDValue visitSRL(SDNode *N);
464 SDValue visitFunnelShift(SDNode *N);
465 SDValue visitSHLSAT(SDNode *N);
466 SDValue visitRotate(SDNode *N);
467 SDValue visitABS(SDNode *N);
468 SDValue visitBSWAP(SDNode *N);
469 SDValue visitBITREVERSE(SDNode *N);
470 SDValue visitCTLZ(SDNode *N);
471 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
472 SDValue visitCTTZ(SDNode *N);
473 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
474 SDValue visitCTPOP(SDNode *N);
475 SDValue visitSELECT(SDNode *N);
476 SDValue visitVSELECT(SDNode *N);
477 SDValue visitVP_SELECT(SDNode *N);
478 SDValue visitSELECT_CC(SDNode *N);
479 SDValue visitSETCC(SDNode *N);
480 SDValue visitSETCCCARRY(SDNode *N);
481 SDValue visitSIGN_EXTEND(SDNode *N);
482 SDValue visitZERO_EXTEND(SDNode *N);
483 SDValue visitANY_EXTEND(SDNode *N);
484 SDValue visitAssertExt(SDNode *N);
485 SDValue visitAssertAlign(SDNode *N);
486 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
487 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
488 SDValue visitTRUNCATE(SDNode *N);
489 SDValue visitBITCAST(SDNode *N);
490 SDValue visitFREEZE(SDNode *N);
491 SDValue visitBUILD_PAIR(SDNode *N);
492 SDValue visitFADD(SDNode *N);
493 SDValue visitVP_FADD(SDNode *N);
494 SDValue visitVP_FSUB(SDNode *N);
495 SDValue visitSTRICT_FADD(SDNode *N);
496 SDValue visitFSUB(SDNode *N);
497 SDValue visitFMUL(SDNode *N);
498 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
499 SDValue visitFMAD(SDNode *N);
500 SDValue visitFDIV(SDNode *N);
501 SDValue visitFREM(SDNode *N);
502 SDValue visitFSQRT(SDNode *N);
503 SDValue visitFCOPYSIGN(SDNode *N);
504 SDValue visitFPOW(SDNode *N);
505 SDValue visitSINT_TO_FP(SDNode *N);
506 SDValue visitUINT_TO_FP(SDNode *N);
507 SDValue visitFP_TO_SINT(SDNode *N);
508 SDValue visitFP_TO_UINT(SDNode *N);
509 SDValue visitXRINT(SDNode *N);
510 SDValue visitFP_ROUND(SDNode *N);
511 SDValue visitFP_EXTEND(SDNode *N);
512 SDValue visitFNEG(SDNode *N);
513 SDValue visitFABS(SDNode *N);
514 SDValue visitFCEIL(SDNode *N);
515 SDValue visitFTRUNC(SDNode *N);
516 SDValue visitFFREXP(SDNode *N);
517 SDValue visitFFLOOR(SDNode *N);
518 SDValue visitFMinMax(SDNode *N);
519 SDValue visitBRCOND(SDNode *N);
520 SDValue visitBR_CC(SDNode *N);
521 SDValue visitLOAD(SDNode *N);
522
523 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
524 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
525 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
526
527 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
528
529 SDValue visitSTORE(SDNode *N);
530 SDValue visitATOMIC_STORE(SDNode *N);
531 SDValue visitLIFETIME_END(SDNode *N);
532 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
533 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
534 SDValue visitBUILD_VECTOR(SDNode *N);
535 SDValue visitCONCAT_VECTORS(SDNode *N);
536 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
537 SDValue visitVECTOR_SHUFFLE(SDNode *N);
538 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
539 SDValue visitINSERT_SUBVECTOR(SDNode *N);
540 SDValue visitMLOAD(SDNode *N);
541 SDValue visitMSTORE(SDNode *N);
542 SDValue visitMGATHER(SDNode *N);
543 SDValue visitMSCATTER(SDNode *N);
544 SDValue visitVPGATHER(SDNode *N);
545 SDValue visitVPSCATTER(SDNode *N);
546 SDValue visitVP_STRIDED_LOAD(SDNode *N);
547 SDValue visitVP_STRIDED_STORE(SDNode *N);
548 SDValue visitFP_TO_FP16(SDNode *N);
549 SDValue visitFP16_TO_FP(SDNode *N);
550 SDValue visitFP_TO_BF16(SDNode *N);
551 SDValue visitBF16_TO_FP(SDNode *N);
552 SDValue visitVECREDUCE(SDNode *N);
553 SDValue visitVPOp(SDNode *N);
554 SDValue visitGET_FPENV_MEM(SDNode *N);
555 SDValue visitSET_FPENV_MEM(SDNode *N);
556
557 template <class MatchContextClass>
558 SDValue visitFADDForFMACombine(SDNode *N);
559 template <class MatchContextClass>
560 SDValue visitFSUBForFMACombine(SDNode *N);
561 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
562
563 SDValue XformToShuffleWithZero(SDNode *N);
564 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
565 const SDLoc &DL,
566 SDNode *N,
567 SDValue N0,
568 SDValue N1);
569 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
570 SDValue N1, SDNodeFlags Flags);
571 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
572 SDValue N1, SDNodeFlags Flags);
573 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
574 EVT VT, SDValue N0, SDValue N1,
575 SDNodeFlags Flags = SDNodeFlags());
576
577 SDValue visitShiftByConstant(SDNode *N);
578
579 SDValue foldSelectOfConstants(SDNode *N);
580 SDValue foldVSelectOfConstants(SDNode *N);
581 SDValue foldBinOpIntoSelect(SDNode *BO);
582 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
583 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
584 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
585 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
587 bool NotExtCompare = false);
588 SDValue convertSelectOfFPConstantsToLoadOffset(
589 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
591 SDValue foldSignChangeInBitcast(SDNode *N);
592 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
594 SDValue foldSelectOfBinops(SDNode *N);
595 SDValue foldSextSetcc(SDNode *N);
596 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
597 const SDLoc &DL);
598 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
599 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
600 SDValue unfoldMaskedMerge(SDNode *N);
601 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
602 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
603 const SDLoc &DL, bool foldBooleans);
604 SDValue rebuildSetCC(SDValue N);
605
606 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
607 SDValue &CC, bool MatchStrict = false) const;
608 bool isOneUseSetCC(SDValue N) const;
609
610 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
611 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
612
613 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
614 unsigned HiOp);
615 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
616 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
617 const TargetLowering &TLI);
618
619 SDValue CombineExtLoad(SDNode *N);
620 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
621 SDValue combineRepeatedFPDivisors(SDNode *N);
622 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
623 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
624 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
625 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
626 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
627 SDValue BuildSDIV(SDNode *N);
628 SDValue BuildSDIVPow2(SDNode *N);
629 SDValue BuildUDIV(SDNode *N);
630 SDValue BuildSREMPow2(SDNode *N);
631 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
632 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
633 bool KnownNeverZero = false,
634 bool InexpensiveOnly = false,
635 std::optional<EVT> OutVT = std::nullopt);
636 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
637 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
638 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
639 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
640 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
641 SDNodeFlags Flags, bool Reciprocal);
642 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
643 SDNodeFlags Flags, bool Reciprocal);
644 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
645 bool DemandHighBits = true);
646 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
647 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
648 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
649 unsigned PosOpcode, unsigned NegOpcode,
650 const SDLoc &DL);
651 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
652 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
653 unsigned PosOpcode, unsigned NegOpcode,
654 const SDLoc &DL);
655 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
656 SDValue MatchLoadCombine(SDNode *N);
657 SDValue mergeTruncStores(StoreSDNode *N);
658 SDValue reduceLoadWidth(SDNode *N);
659 SDValue ReduceLoadOpStoreWidth(SDNode *N);
661 SDValue TransformFPLoadStorePair(SDNode *N);
662 SDValue convertBuildVecZextToZext(SDNode *N);
663 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
664 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
665 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
666 SDValue reduceBuildVecToShuffle(SDNode *N);
667 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
668 ArrayRef<int> VectorMask, SDValue VecIn1,
669 SDValue VecIn2, unsigned LeftIdx,
670 bool DidSplitVec);
671 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
672
673 /// Walk up chain skipping non-aliasing memory nodes,
674 /// looking for aliasing nodes and adding them to the Aliases vector.
675 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
676 SmallVectorImpl<SDValue> &Aliases);
677
678 /// Return true if there is any possibility that the two addresses overlap.
679 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
680
681 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
682 /// chain (aliasing node.)
683 SDValue FindBetterChain(SDNode *N, SDValue Chain);
684
685 /// Try to replace a store and any possibly adjacent stores on
686 /// consecutive chains with better chains. Return true only if St is
687 /// replaced.
688 ///
689 /// Notice that other chains may still be replaced even if the function
690 /// returns false.
691 bool findBetterNeighborChains(StoreSDNode *St);
692
693 // Helper for findBetterNeighborChains. Walk up store chain add additional
694 // chained stores that do not overlap and can be parallelized.
695 bool parallelizeChainedStores(StoreSDNode *St);
696
697 /// Holds a pointer to an LSBaseSDNode as well as information on where it
698 /// is located in a sequence of memory operations connected by a chain.
699 struct MemOpLink {
700 // Ptr to the mem node.
701 LSBaseSDNode *MemNode;
702
703 // Offset from the base ptr.
704 int64_t OffsetFromBase;
705
706 MemOpLink(LSBaseSDNode *N, int64_t Offset)
707 : MemNode(N), OffsetFromBase(Offset) {}
708 };
709
710 // Classify the origin of a stored value.
711 enum class StoreSource { Unknown, Constant, Extract, Load };
712 StoreSource getStoreSource(SDValue StoreVal) {
713 switch (StoreVal.getOpcode()) {
714 case ISD::Constant:
715 case ISD::ConstantFP:
716 return StoreSource::Constant;
720 return StoreSource::Constant;
721 return StoreSource::Unknown;
724 return StoreSource::Extract;
725 case ISD::LOAD:
726 return StoreSource::Load;
727 default:
728 return StoreSource::Unknown;
729 }
730 }
731
732 /// This is a helper function for visitMUL to check the profitability
733 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
734 /// MulNode is the original multiply, AddNode is (add x, c1),
735 /// and ConstNode is c2.
736 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
737 SDValue ConstNode);
738
739 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
740 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
741 /// the type of the loaded value to be extended.
742 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
743 EVT LoadResultTy, EVT &ExtVT);
744
745 /// Helper function to calculate whether the given Load/Store can have its
746 /// width reduced to ExtVT.
747 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
748 EVT &MemVT, unsigned ShAmt = 0);
749
750 /// Used by BackwardsPropagateMask to find suitable loads.
751 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
752 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
753 ConstantSDNode *Mask, SDNode *&NodeToMask);
754 /// Attempt to propagate a given AND node back to load leaves so that they
755 /// can be combined into narrow loads.
756 bool BackwardsPropagateMask(SDNode *N);
757
758 /// Helper function for mergeConsecutiveStores which merges the component
759 /// store chains.
760 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
761 unsigned NumStores);
762
763 /// Helper function for mergeConsecutiveStores which checks if all the store
764 /// nodes have the same underlying object. We can still reuse the first
765 /// store's pointer info if all the stores are from the same object.
766 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
767
768 /// This is a helper function for mergeConsecutiveStores. When the source
769 /// elements of the consecutive stores are all constants or all extracted
770 /// vector elements, try to merge them into one larger store introducing
771 /// bitcasts if necessary. \return True if a merged store was created.
772 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
773 EVT MemVT, unsigned NumStores,
774 bool IsConstantSrc, bool UseVector,
775 bool UseTrunc);
776
777 /// This is a helper function for mergeConsecutiveStores. Stores that
778 /// potentially may be merged with St are placed in StoreNodes. RootNode is
779 /// a chain predecessor to all store candidates.
780 void getStoreMergeCandidates(StoreSDNode *St,
781 SmallVectorImpl<MemOpLink> &StoreNodes,
782 SDNode *&Root);
783
784 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
785 /// have indirect dependency through their operands. RootNode is the
786 /// predecessor to all stores calculated by getStoreMergeCandidates and is
787 /// used to prune the dependency check. \return True if safe to merge.
788 bool checkMergeStoreCandidatesForDependencies(
789 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
790 SDNode *RootNode);
791
792 /// This is a helper function for mergeConsecutiveStores. Given a list of
793 /// store candidates, find the first N that are consecutive in memory.
794 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
795 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
796 int64_t ElementSizeBytes) const;
797
798 /// This is a helper function for mergeConsecutiveStores. It is used for
799 /// store chains that are composed entirely of constant values.
800 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
801 unsigned NumConsecutiveStores,
802 EVT MemVT, SDNode *Root, bool AllowVectors);
803
804 /// This is a helper function for mergeConsecutiveStores. It is used for
805 /// store chains that are composed entirely of extracted vector elements.
806 /// When extracting multiple vector elements, try to store them in one
807 /// vector store rather than a sequence of scalar stores.
808 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
809 unsigned NumConsecutiveStores, EVT MemVT,
810 SDNode *Root);
811
812 /// This is a helper function for mergeConsecutiveStores. It is used for
813 /// store chains that are composed entirely of loaded values.
814 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
815 unsigned NumConsecutiveStores, EVT MemVT,
816 SDNode *Root, bool AllowVectors,
817 bool IsNonTemporalStore, bool IsNonTemporalLoad);
818
819 /// Merge consecutive store operations into a wide store.
820 /// This optimization uses wide integers or vectors when possible.
821 /// \return true if stores were merged.
822 bool mergeConsecutiveStores(StoreSDNode *St);
823
824 /// Try to transform a truncation where C is a constant:
825 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
826 ///
827 /// \p N needs to be a truncation and its first operand an AND. Other
828 /// requirements are checked by the function (e.g. that trunc is
829 /// single-use) and if missed an empty SDValue is returned.
830 SDValue distributeTruncateThroughAnd(SDNode *N);
831
832 /// Helper function to determine whether the target supports operation
833 /// given by \p Opcode for type \p VT, that is, whether the operation
834 /// is legal or custom before legalizing operations, and whether is
835 /// legal (but not custom) after legalization.
836 bool hasOperation(unsigned Opcode, EVT VT) {
837 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
838 }
839
840 public:
841 /// Runs the dag combiner on all nodes in the work list
842 void Run(CombineLevel AtLevel);
843
844 SelectionDAG &getDAG() const { return DAG; }
845
846 /// Returns a type large enough to hold any valid shift amount - before type
847 /// legalization these can be huge.
848 EVT getShiftAmountTy(EVT LHSTy) {
849 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
850 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
851 }
852
853 /// This method returns true if we are running before type legalization or
854 /// if the specified VT is legal.
855 bool isTypeLegal(const EVT &VT) {
856 if (!LegalTypes) return true;
857 return TLI.isTypeLegal(VT);
858 }
859
860 /// Convenience wrapper around TargetLowering::getSetCCResultType
861 EVT getSetCCResultType(EVT VT) const {
862 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
863 }
864
865 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
866 SDValue OrigLoad, SDValue ExtLoad,
867 ISD::NodeType ExtType);
868 };
869
870/// This class is a DAGUpdateListener that removes any deleted
871/// nodes from the worklist.
872class WorklistRemover : public SelectionDAG::DAGUpdateListener {
873 DAGCombiner &DC;
874
875public:
876 explicit WorklistRemover(DAGCombiner &dc)
877 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
878
879 void NodeDeleted(SDNode *N, SDNode *E) override {
880 DC.removeFromWorklist(N);
881 }
882};
883
884class WorklistInserter : public SelectionDAG::DAGUpdateListener {
885 DAGCombiner &DC;
886
887public:
888 explicit WorklistInserter(DAGCombiner &dc)
889 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
890
891 // FIXME: Ideally we could add N to the worklist, but this causes exponential
892 // compile time costs in large DAGs, e.g. Halide.
893 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
894};
895
896} // end anonymous namespace
897
898//===----------------------------------------------------------------------===//
899// TargetLowering::DAGCombinerInfo implementation
900//===----------------------------------------------------------------------===//
901
903 ((DAGCombiner*)DC)->AddToWorklist(N);
904}
905
907CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
908 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
909}
910
912CombineTo(SDNode *N, SDValue Res, bool AddTo) {
913 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
914}
915
917CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
918 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
919}
920
923 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
924}
925
928 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
929}
930
931//===----------------------------------------------------------------------===//
932// Helper Functions
933//===----------------------------------------------------------------------===//
934
935void DAGCombiner::deleteAndRecombine(SDNode *N) {
936 removeFromWorklist(N);
937
938 // If the operands of this node are only used by the node, they will now be
939 // dead. Make sure to re-visit them and recursively delete dead nodes.
940 for (const SDValue &Op : N->ops())
941 // For an operand generating multiple values, one of the values may
942 // become dead allowing further simplification (e.g. split index
943 // arithmetic from an indexed load).
944 if (Op->hasOneUse() || Op->getNumValues() > 1)
945 AddToWorklist(Op.getNode());
946
947 DAG.DeleteNode(N);
948}
949
950// APInts must be the same size for most operations, this helper
951// function zero extends the shorter of the pair so that they match.
952// We provide an Offset so that we can create bitwidths that won't overflow.
953static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
954 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
955 LHS = LHS.zext(Bits);
956 RHS = RHS.zext(Bits);
957}
958
959// Return true if this node is a setcc, or is a select_cc
960// that selects between the target values used for true and false, making it
961// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
962// the appropriate nodes based on the type of node we are checking. This
963// simplifies life a bit for the callers.
964bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
965 SDValue &CC, bool MatchStrict) const {
966 if (N.getOpcode() == ISD::SETCC) {
967 LHS = N.getOperand(0);
968 RHS = N.getOperand(1);
969 CC = N.getOperand(2);
970 return true;
971 }
972
973 if (MatchStrict &&
974 (N.getOpcode() == ISD::STRICT_FSETCC ||
975 N.getOpcode() == ISD::STRICT_FSETCCS)) {
976 LHS = N.getOperand(1);
977 RHS = N.getOperand(2);
978 CC = N.getOperand(3);
979 return true;
980 }
981
982 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
983 !TLI.isConstFalseVal(N.getOperand(3)))
984 return false;
985
986 if (TLI.getBooleanContents(N.getValueType()) ==
988 return false;
989
990 LHS = N.getOperand(0);
991 RHS = N.getOperand(1);
992 CC = N.getOperand(4);
993 return true;
994}
995
996/// Return true if this is a SetCC-equivalent operation with only one use.
997/// If this is true, it allows the users to invert the operation for free when
998/// it is profitable to do so.
999bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1000 SDValue N0, N1, N2;
1001 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1002 return true;
1003 return false;
1004}
1005
1007 if (!ScalarTy.isSimple())
1008 return false;
1009
1010 uint64_t MaskForTy = 0ULL;
1011 switch (ScalarTy.getSimpleVT().SimpleTy) {
1012 case MVT::i8:
1013 MaskForTy = 0xFFULL;
1014 break;
1015 case MVT::i16:
1016 MaskForTy = 0xFFFFULL;
1017 break;
1018 case MVT::i32:
1019 MaskForTy = 0xFFFFFFFFULL;
1020 break;
1021 default:
1022 return false;
1023 break;
1024 }
1025
1026 APInt Val;
1027 if (ISD::isConstantSplatVector(N, Val))
1028 return Val.getLimitedValue() == MaskForTy;
1029
1030 return false;
1031}
1032
1033// Determines if it is a constant integer or a splat/build vector of constant
1034// integers (and undefs).
1035// Do not permit build vector implicit truncation.
1036static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1037 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1038 return !(Const->isOpaque() && NoOpaques);
1039 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1040 return false;
1041 unsigned BitWidth = N.getScalarValueSizeInBits();
1042 for (const SDValue &Op : N->op_values()) {
1043 if (Op.isUndef())
1044 continue;
1045 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1046 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1047 (Const->isOpaque() && NoOpaques))
1048 return false;
1049 }
1050 return true;
1051}
1052
1053// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1054// undef's.
1055static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1056 if (V.getOpcode() != ISD::BUILD_VECTOR)
1057 return false;
1058 return isConstantOrConstantVector(V, NoOpaques) ||
1060}
1061
1062// Determine if this an indexed load with an opaque target constant index.
1063static bool canSplitIdx(LoadSDNode *LD) {
1064 return MaySplitLoadIndex &&
1065 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1066 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1067}
1068
1069bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1070 const SDLoc &DL,
1071 SDNode *N,
1072 SDValue N0,
1073 SDValue N1) {
1074 // Currently this only tries to ensure we don't undo the GEP splits done by
1075 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1076 // we check if the following transformation would be problematic:
1077 // (load/store (add, (add, x, offset1), offset2)) ->
1078 // (load/store (add, x, offset1+offset2)).
1079
1080 // (load/store (add, (add, x, y), offset2)) ->
1081 // (load/store (add, (add, x, offset2), y)).
1082
1083 if (N0.getOpcode() != ISD::ADD)
1084 return false;
1085
1086 // Check for vscale addressing modes.
1087 // (load/store (add/sub (add x, y), vscale))
1088 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1089 // (load/store (add/sub (add x, y), (mul vscale, C)))
1090 if ((N1.getOpcode() == ISD::VSCALE ||
1091 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1092 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1093 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1094 N1.getValueType().getFixedSizeInBits() <= 64) {
1095 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1096 ? N1.getConstantOperandVal(0)
1097 : (N1.getOperand(0).getConstantOperandVal(0) *
1098 (N1.getOpcode() == ISD::SHL
1099 ? (1LL << N1.getConstantOperandVal(1))
1100 : N1.getConstantOperandVal(1)));
1101 if (Opc == ISD::SUB)
1102 ScalableOffset = -ScalableOffset;
1103 if (all_of(N->uses(), [&](SDNode *Node) {
1104 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1105 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1107 AM.HasBaseReg = true;
1108 AM.ScalableOffset = ScalableOffset;
1109 EVT VT = LoadStore->getMemoryVT();
1110 unsigned AS = LoadStore->getAddressSpace();
1111 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1112 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1113 AS);
1114 }
1115 return false;
1116 }))
1117 return true;
1118 }
1119
1120 if (Opc != ISD::ADD)
1121 return false;
1122
1123 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1124 if (!C2)
1125 return false;
1126
1127 const APInt &C2APIntVal = C2->getAPIntValue();
1128 if (C2APIntVal.getSignificantBits() > 64)
1129 return false;
1130
1131 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1132 if (N0.hasOneUse())
1133 return false;
1134
1135 const APInt &C1APIntVal = C1->getAPIntValue();
1136 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1137 if (CombinedValueIntVal.getSignificantBits() > 64)
1138 return false;
1139 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1140
1141 for (SDNode *Node : N->uses()) {
1142 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1143 // Is x[offset2] already not a legal addressing mode? If so then
1144 // reassociating the constants breaks nothing (we test offset2 because
1145 // that's the one we hope to fold into the load or store).
1147 AM.HasBaseReg = true;
1148 AM.BaseOffs = C2APIntVal.getSExtValue();
1149 EVT VT = LoadStore->getMemoryVT();
1150 unsigned AS = LoadStore->getAddressSpace();
1151 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1152 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1153 continue;
1154
1155 // Would x[offset1+offset2] still be a legal addressing mode?
1156 AM.BaseOffs = CombinedValue;
1157 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1158 return true;
1159 }
1160 }
1161 } else {
1162 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1163 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1164 return false;
1165
1166 for (SDNode *Node : N->uses()) {
1167 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1168 if (!LoadStore)
1169 return false;
1170
1171 // Is x[offset2] a legal addressing mode? If so then
1172 // reassociating the constants breaks address pattern
1174 AM.HasBaseReg = true;
1175 AM.BaseOffs = C2APIntVal.getSExtValue();
1176 EVT VT = LoadStore->getMemoryVT();
1177 unsigned AS = LoadStore->getAddressSpace();
1178 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1179 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1180 return false;
1181 }
1182 return true;
1183 }
1184
1185 return false;
1186}
1187
1188/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1189/// \p N0 is the same kind of operation as \p Opc.
1190SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1191 SDValue N0, SDValue N1,
1192 SDNodeFlags Flags) {
1193 EVT VT = N0.getValueType();
1194
1195 if (N0.getOpcode() != Opc)
1196 return SDValue();
1197
1198 SDValue N00 = N0.getOperand(0);
1199 SDValue N01 = N0.getOperand(1);
1200
1202 SDNodeFlags NewFlags;
1203 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1204 Flags.hasNoUnsignedWrap())
1205 NewFlags.setNoUnsignedWrap(true);
1206
1208 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1209 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1210 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1211 return SDValue();
1212 }
1213 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1214 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1215 // iff (op x, c1) has one use
1216 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1217 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1218 }
1219 }
1220
1221 // Check for repeated operand logic simplifications.
1222 if (Opc == ISD::AND || Opc == ISD::OR) {
1223 // (N00 & N01) & N00 --> N00 & N01
1224 // (N00 & N01) & N01 --> N00 & N01
1225 // (N00 | N01) | N00 --> N00 | N01
1226 // (N00 | N01) | N01 --> N00 | N01
1227 if (N1 == N00 || N1 == N01)
1228 return N0;
1229 }
1230 if (Opc == ISD::XOR) {
1231 // (N00 ^ N01) ^ N00 --> N01
1232 if (N1 == N00)
1233 return N01;
1234 // (N00 ^ N01) ^ N01 --> N00
1235 if (N1 == N01)
1236 return N00;
1237 }
1238
1239 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1240 if (N1 != N01) {
1241 // Reassociate if (op N00, N1) already exist
1242 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1243 // if Op (Op N00, N1), N01 already exist
1244 // we need to stop reassciate to avoid dead loop
1245 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1246 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1247 }
1248 }
1249
1250 if (N1 != N00) {
1251 // Reassociate if (op N01, N1) already exist
1252 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1253 // if Op (Op N01, N1), N00 already exist
1254 // we need to stop reassciate to avoid dead loop
1255 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1256 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1257 }
1258 }
1259
1260 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1261 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1262 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1263 // comparisons with the same predicate. This enables optimizations as the
1264 // following one:
1265 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1266 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1267 if (Opc == ISD::AND || Opc == ISD::OR) {
1268 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1269 N01->getOpcode() == ISD::SETCC) {
1270 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1271 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1272 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1273 if (CC1 == CC00 && CC1 != CC01) {
1274 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1275 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1276 }
1277 if (CC1 == CC01 && CC1 != CC00) {
1278 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1279 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1280 }
1281 }
1282 }
1283 }
1284
1285 return SDValue();
1286}
1287
1288/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1289/// same kind of operation as \p Opc.
1290SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1291 SDValue N1, SDNodeFlags Flags) {
1292 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1293
1294 // Floating-point reassociation is not allowed without loose FP math.
1295 if (N0.getValueType().isFloatingPoint() ||
1297 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1298 return SDValue();
1299
1300 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1301 return Combined;
1302 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1303 return Combined;
1304 return SDValue();
1305}
1306
1307// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1308// Note that we only expect Flags to be passed from FP operations. For integer
1309// operations they need to be dropped.
1310SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1311 const SDLoc &DL, EVT VT, SDValue N0,
1312 SDValue N1, SDNodeFlags Flags) {
1313 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1314 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1315 N0->hasOneUse() && N1->hasOneUse() &&
1317 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1318 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1319 return DAG.getNode(RedOpc, DL, VT,
1320 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1321 N0.getOperand(0), N1.getOperand(0)));
1322 }
1323 return SDValue();
1324}
1325
1326SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1327 bool AddTo) {
1328 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1329 ++NodesCombined;
1330 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1331 To[0].dump(&DAG);
1332 dbgs() << " and " << NumTo - 1 << " other values\n");
1333 for (unsigned i = 0, e = NumTo; i != e; ++i)
1334 assert((!To[i].getNode() ||
1335 N->getValueType(i) == To[i].getValueType()) &&
1336 "Cannot combine value to value of different type!");
1337
1338 WorklistRemover DeadNodes(*this);
1339 DAG.ReplaceAllUsesWith(N, To);
1340 if (AddTo) {
1341 // Push the new nodes and any users onto the worklist
1342 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1343 if (To[i].getNode())
1344 AddToWorklistWithUsers(To[i].getNode());
1345 }
1346 }
1347
1348 // Finally, if the node is now dead, remove it from the graph. The node
1349 // may not be dead if the replacement process recursively simplified to
1350 // something else needing this node.
1351 if (N->use_empty())
1352 deleteAndRecombine(N);
1353 return SDValue(N, 0);
1354}
1355
1356void DAGCombiner::
1357CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1358 // Replace the old value with the new one.
1359 ++NodesCombined;
1360 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1361 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1362
1363 // Replace all uses.
1364 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1365
1366 // Push the new node and any (possibly new) users onto the worklist.
1367 AddToWorklistWithUsers(TLO.New.getNode());
1368
1369 // Finally, if the node is now dead, remove it from the graph.
1370 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1371}
1372
1373/// Check the specified integer node value to see if it can be simplified or if
1374/// things it uses can be simplified by bit propagation. If so, return true.
1375bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1376 const APInt &DemandedElts,
1377 bool AssumeSingleUse) {
1378 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1379 KnownBits Known;
1380 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1381 AssumeSingleUse))
1382 return false;
1383
1384 // Revisit the node.
1385 AddToWorklist(Op.getNode());
1386
1387 CommitTargetLoweringOpt(TLO);
1388 return true;
1389}
1390
1391/// Check the specified vector node value to see if it can be simplified or
1392/// if things it uses can be simplified as it only uses some of the elements.
1393/// If so, return true.
1394bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1395 const APInt &DemandedElts,
1396 bool AssumeSingleUse) {
1397 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1398 APInt KnownUndef, KnownZero;
1399 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1400 TLO, 0, AssumeSingleUse))
1401 return false;
1402
1403 // Revisit the node.
1404 AddToWorklist(Op.getNode());
1405
1406 CommitTargetLoweringOpt(TLO);
1407 return true;
1408}
1409
1410void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1411 SDLoc DL(Load);
1412 EVT VT = Load->getValueType(0);
1413 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1414
1415 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1416 Trunc.dump(&DAG); dbgs() << '\n');
1417
1418 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1419 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1420
1421 AddToWorklist(Trunc.getNode());
1422 recursivelyDeleteUnusedNodes(Load);
1423}
1424
1425SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1426 Replace = false;
1427 SDLoc DL(Op);
1428 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1429 LoadSDNode *LD = cast<LoadSDNode>(Op);
1430 EVT MemVT = LD->getMemoryVT();
1432 : LD->getExtensionType();
1433 Replace = true;
1434 return DAG.getExtLoad(ExtType, DL, PVT,
1435 LD->getChain(), LD->getBasePtr(),
1436 MemVT, LD->getMemOperand());
1437 }
1438
1439 unsigned Opc = Op.getOpcode();
1440 switch (Opc) {
1441 default: break;
1442 case ISD::AssertSext:
1443 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1444 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1445 break;
1446 case ISD::AssertZext:
1447 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1448 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1449 break;
1450 case ISD::Constant: {
1451 unsigned ExtOpc =
1452 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1453 return DAG.getNode(ExtOpc, DL, PVT, Op);
1454 }
1455 }
1456
1457 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1458 return SDValue();
1459 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1460}
1461
1462SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1464 return SDValue();
1465 EVT OldVT = Op.getValueType();
1466 SDLoc DL(Op);
1467 bool Replace = false;
1468 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1469 if (!NewOp.getNode())
1470 return SDValue();
1471 AddToWorklist(NewOp.getNode());
1472
1473 if (Replace)
1474 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1475 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1476 DAG.getValueType(OldVT));
1477}
1478
1479SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1480 EVT OldVT = Op.getValueType();
1481 SDLoc DL(Op);
1482 bool Replace = false;
1483 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1484 if (!NewOp.getNode())
1485 return SDValue();
1486 AddToWorklist(NewOp.getNode());
1487
1488 if (Replace)
1489 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1490 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1491}
1492
1493/// Promote the specified integer binary operation if the target indicates it is
1494/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1495/// i32 since i16 instructions are longer.
1496SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1497 if (!LegalOperations)
1498 return SDValue();
1499
1500 EVT VT = Op.getValueType();
1501 if (VT.isVector() || !VT.isInteger())
1502 return SDValue();
1503
1504 // If operation type is 'undesirable', e.g. i16 on x86, consider
1505 // promoting it.
1506 unsigned Opc = Op.getOpcode();
1507 if (TLI.isTypeDesirableForOp(Opc, VT))
1508 return SDValue();
1509
1510 EVT PVT = VT;
1511 // Consult target whether it is a good idea to promote this operation and
1512 // what's the right type to promote it to.
1513 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1514 assert(PVT != VT && "Don't know what type to promote to!");
1515
1516 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1517
1518 bool Replace0 = false;
1519 SDValue N0 = Op.getOperand(0);
1520 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1521
1522 bool Replace1 = false;
1523 SDValue N1 = Op.getOperand(1);
1524 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1525 SDLoc DL(Op);
1526
1527 SDValue RV =
1528 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1529
1530 // We are always replacing N0/N1's use in N and only need additional
1531 // replacements if there are additional uses.
1532 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1533 // (SDValue) here because the node may reference multiple values
1534 // (for example, the chain value of a load node).
1535 Replace0 &= !N0->hasOneUse();
1536 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1537
1538 // Combine Op here so it is preserved past replacements.
1539 CombineTo(Op.getNode(), RV);
1540
1541 // If operands have a use ordering, make sure we deal with
1542 // predecessor first.
1543 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1544 std::swap(N0, N1);
1545 std::swap(NN0, NN1);
1546 }
1547
1548 if (Replace0) {
1549 AddToWorklist(NN0.getNode());
1550 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1551 }
1552 if (Replace1) {
1553 AddToWorklist(NN1.getNode());
1554 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1555 }
1556 return Op;
1557 }
1558 return SDValue();
1559}
1560
1561/// Promote the specified integer shift operation if the target indicates it is
1562/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1563/// i32 since i16 instructions are longer.
1564SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1565 if (!LegalOperations)
1566 return SDValue();
1567
1568 EVT VT = Op.getValueType();
1569 if (VT.isVector() || !VT.isInteger())
1570 return SDValue();
1571
1572 // If operation type is 'undesirable', e.g. i16 on x86, consider
1573 // promoting it.
1574 unsigned Opc = Op.getOpcode();
1575 if (TLI.isTypeDesirableForOp(Opc, VT))
1576 return SDValue();
1577
1578 EVT PVT = VT;
1579 // Consult target whether it is a good idea to promote this operation and
1580 // what's the right type to promote it to.
1581 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1582 assert(PVT != VT && "Don't know what type to promote to!");
1583
1584 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1585
1586 bool Replace = false;
1587 SDValue N0 = Op.getOperand(0);
1588 if (Opc == ISD::SRA)
1589 N0 = SExtPromoteOperand(N0, PVT);
1590 else if (Opc == ISD::SRL)
1591 N0 = ZExtPromoteOperand(N0, PVT);
1592 else
1593 N0 = PromoteOperand(N0, PVT, Replace);
1594
1595 if (!N0.getNode())
1596 return SDValue();
1597
1598 SDLoc DL(Op);
1599 SDValue N1 = Op.getOperand(1);
1600 SDValue RV =
1601 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1602
1603 if (Replace)
1604 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1605
1606 // Deal with Op being deleted.
1607 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1608 return RV;
1609 }
1610 return SDValue();
1611}
1612
1613SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1614 if (!LegalOperations)
1615 return SDValue();
1616
1617 EVT VT = Op.getValueType();
1618 if (VT.isVector() || !VT.isInteger())
1619 return SDValue();
1620
1621 // If operation type is 'undesirable', e.g. i16 on x86, consider
1622 // promoting it.
1623 unsigned Opc = Op.getOpcode();
1624 if (TLI.isTypeDesirableForOp(Opc, VT))
1625 return SDValue();
1626
1627 EVT PVT = VT;
1628 // Consult target whether it is a good idea to promote this operation and
1629 // what's the right type to promote it to.
1630 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1631 assert(PVT != VT && "Don't know what type to promote to!");
1632 // fold (aext (aext x)) -> (aext x)
1633 // fold (aext (zext x)) -> (zext x)
1634 // fold (aext (sext x)) -> (sext x)
1635 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1636 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1637 }
1638 return SDValue();
1639}
1640
1641bool DAGCombiner::PromoteLoad(SDValue Op) {
1642 if (!LegalOperations)
1643 return false;
1644
1645 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1646 return false;
1647
1648 EVT VT = Op.getValueType();
1649 if (VT.isVector() || !VT.isInteger())
1650 return false;
1651
1652 // If operation type is 'undesirable', e.g. i16 on x86, consider
1653 // promoting it.
1654 unsigned Opc = Op.getOpcode();
1655 if (TLI.isTypeDesirableForOp(Opc, VT))
1656 return false;
1657
1658 EVT PVT = VT;
1659 // Consult target whether it is a good idea to promote this operation and
1660 // what's the right type to promote it to.
1661 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1662 assert(PVT != VT && "Don't know what type to promote to!");
1663
1664 SDLoc DL(Op);
1665 SDNode *N = Op.getNode();
1666 LoadSDNode *LD = cast<LoadSDNode>(N);
1667 EVT MemVT = LD->getMemoryVT();
1669 : LD->getExtensionType();
1670 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1671 LD->getChain(), LD->getBasePtr(),
1672 MemVT, LD->getMemOperand());
1673 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1674
1675 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1676 Result.dump(&DAG); dbgs() << '\n');
1677
1679 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1680
1681 AddToWorklist(Result.getNode());
1682 recursivelyDeleteUnusedNodes(N);
1683 return true;
1684 }
1685
1686 return false;
1687}
1688
1689/// Recursively delete a node which has no uses and any operands for
1690/// which it is the only use.
1691///
1692/// Note that this both deletes the nodes and removes them from the worklist.
1693/// It also adds any nodes who have had a user deleted to the worklist as they
1694/// may now have only one use and subject to other combines.
1695bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1696 if (!N->use_empty())
1697 return false;
1698
1700 Nodes.insert(N);
1701 do {
1702 N = Nodes.pop_back_val();
1703 if (!N)
1704 continue;
1705
1706 if (N->use_empty()) {
1707 for (const SDValue &ChildN : N->op_values())
1708 Nodes.insert(ChildN.getNode());
1709
1710 removeFromWorklist(N);
1711 DAG.DeleteNode(N);
1712 } else {
1713 AddToWorklist(N);
1714 }
1715 } while (!Nodes.empty());
1716 return true;
1717}
1718
1719//===----------------------------------------------------------------------===//
1720// Main DAG Combiner implementation
1721//===----------------------------------------------------------------------===//
1722
1723void DAGCombiner::Run(CombineLevel AtLevel) {
1724 // set the instance variables, so that the various visit routines may use it.
1725 Level = AtLevel;
1726 LegalDAG = Level >= AfterLegalizeDAG;
1727 LegalOperations = Level >= AfterLegalizeVectorOps;
1728 LegalTypes = Level >= AfterLegalizeTypes;
1729
1730 WorklistInserter AddNodes(*this);
1731
1732 // Add all the dag nodes to the worklist.
1733 //
1734 // Note: All nodes are not added to PruningList here, this is because the only
1735 // nodes which can be deleted are those which have no uses and all other nodes
1736 // which would otherwise be added to the worklist by the first call to
1737 // getNextWorklistEntry are already present in it.
1738 for (SDNode &Node : DAG.allnodes())
1739 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1740
1741 // Create a dummy node (which is not added to allnodes), that adds a reference
1742 // to the root node, preventing it from being deleted, and tracking any
1743 // changes of the root.
1744 HandleSDNode Dummy(DAG.getRoot());
1745
1746 // While we have a valid worklist entry node, try to combine it.
1747 while (SDNode *N = getNextWorklistEntry()) {
1748 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1749 // N is deleted from the DAG, since they too may now be dead or may have a
1750 // reduced number of uses, allowing other xforms.
1751 if (recursivelyDeleteUnusedNodes(N))
1752 continue;
1753
1754 WorklistRemover DeadNodes(*this);
1755
1756 // If this combine is running after legalizing the DAG, re-legalize any
1757 // nodes pulled off the worklist.
1758 if (LegalDAG) {
1759 SmallSetVector<SDNode *, 16> UpdatedNodes;
1760 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1761
1762 for (SDNode *LN : UpdatedNodes)
1763 AddToWorklistWithUsers(LN);
1764
1765 if (!NIsValid)
1766 continue;
1767 }
1768
1769 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1770
1771 // Add any operands of the new node which have not yet been combined to the
1772 // worklist as well. getNextWorklistEntry flags nodes that have been
1773 // combined before. Because the worklist uniques things already, this won't
1774 // repeatedly process the same operand.
1775 for (const SDValue &ChildN : N->op_values())
1776 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1777 /*SkipIfCombinedBefore=*/true);
1778
1779 SDValue RV = combine(N);
1780
1781 if (!RV.getNode())
1782 continue;
1783
1784 ++NodesCombined;
1785
1786 // If we get back the same node we passed in, rather than a new node or
1787 // zero, we know that the node must have defined multiple values and
1788 // CombineTo was used. Since CombineTo takes care of the worklist
1789 // mechanics for us, we have no work to do in this case.
1790 if (RV.getNode() == N)
1791 continue;
1792
1793 assert(N->getOpcode() != ISD::DELETED_NODE &&
1794 RV.getOpcode() != ISD::DELETED_NODE &&
1795 "Node was deleted but visit returned new node!");
1796
1797 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1798
1799 if (N->getNumValues() == RV->getNumValues())
1800 DAG.ReplaceAllUsesWith(N, RV.getNode());
1801 else {
1802 assert(N->getValueType(0) == RV.getValueType() &&
1803 N->getNumValues() == 1 && "Type mismatch");
1804 DAG.ReplaceAllUsesWith(N, &RV);
1805 }
1806
1807 // Push the new node and any users onto the worklist. Omit this if the
1808 // new node is the EntryToken (e.g. if a store managed to get optimized
1809 // out), because re-visiting the EntryToken and its users will not uncover
1810 // any additional opportunities, but there may be a large number of such
1811 // users, potentially causing compile time explosion.
1812 if (RV.getOpcode() != ISD::EntryToken)
1813 AddToWorklistWithUsers(RV.getNode());
1814
1815 // Finally, if the node is now dead, remove it from the graph. The node
1816 // may not be dead if the replacement process recursively simplified to
1817 // something else needing this node. This will also take care of adding any
1818 // operands which have lost a user to the worklist.
1819 recursivelyDeleteUnusedNodes(N);
1820 }
1821
1822 // If the root changed (e.g. it was a dead load, update the root).
1823 DAG.setRoot(Dummy.getValue());
1824 DAG.RemoveDeadNodes();
1825}
1826
1827SDValue DAGCombiner::visit(SDNode *N) {
1828 // clang-format off
1829 switch (N->getOpcode()) {
1830 default: break;
1831 case ISD::TokenFactor: return visitTokenFactor(N);
1832 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1833 case ISD::ADD: return visitADD(N);
1834 case ISD::SUB: return visitSUB(N);
1835 case ISD::SADDSAT:
1836 case ISD::UADDSAT: return visitADDSAT(N);
1837 case ISD::SSUBSAT:
1838 case ISD::USUBSAT: return visitSUBSAT(N);
1839 case ISD::ADDC: return visitADDC(N);
1840 case ISD::SADDO:
1841 case ISD::UADDO: return visitADDO(N);
1842 case ISD::SUBC: return visitSUBC(N);
1843 case ISD::SSUBO:
1844 case ISD::USUBO: return visitSUBO(N);
1845 case ISD::ADDE: return visitADDE(N);
1846 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1847 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1848 case ISD::SUBE: return visitSUBE(N);
1849 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1850 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1851 case ISD::SMULFIX:
1852 case ISD::SMULFIXSAT:
1853 case ISD::UMULFIX:
1854 case ISD::UMULFIXSAT: return visitMULFIX(N);
1855 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1856 case ISD::SDIV: return visitSDIV(N);
1857 case ISD::UDIV: return visitUDIV(N);
1858 case ISD::SREM:
1859 case ISD::UREM: return visitREM(N);
1860 case ISD::MULHU: return visitMULHU(N);
1861 case ISD::MULHS: return visitMULHS(N);
1862 case ISD::AVGFLOORS:
1863 case ISD::AVGFLOORU:
1864 case ISD::AVGCEILS:
1865 case ISD::AVGCEILU: return visitAVG(N);
1866 case ISD::ABDS:
1867 case ISD::ABDU: return visitABD(N);
1868 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1869 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1870 case ISD::SMULO:
1871 case ISD::UMULO: return visitMULO(N);
1872 case ISD::SMIN:
1873 case ISD::SMAX:
1874 case ISD::UMIN:
1875 case ISD::UMAX: return visitIMINMAX(N);
1876 case ISD::AND: return visitAND(N);
1877 case ISD::OR: return visitOR(N);
1878 case ISD::XOR: return visitXOR(N);
1879 case ISD::SHL: return visitSHL(N);
1880 case ISD::SRA: return visitSRA(N);
1881 case ISD::SRL: return visitSRL(N);
1882 case ISD::ROTR:
1883 case ISD::ROTL: return visitRotate(N);
1884 case ISD::FSHL:
1885 case ISD::FSHR: return visitFunnelShift(N);
1886 case ISD::SSHLSAT:
1887 case ISD::USHLSAT: return visitSHLSAT(N);
1888 case ISD::ABS: return visitABS(N);
1889 case ISD::BSWAP: return visitBSWAP(N);
1890 case ISD::BITREVERSE: return visitBITREVERSE(N);
1891 case ISD::CTLZ: return visitCTLZ(N);
1892 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1893 case ISD::CTTZ: return visitCTTZ(N);
1894 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1895 case ISD::CTPOP: return visitCTPOP(N);
1896 case ISD::SELECT: return visitSELECT(N);
1897 case ISD::VSELECT: return visitVSELECT(N);
1898 case ISD::SELECT_CC: return visitSELECT_CC(N);
1899 case ISD::SETCC: return visitSETCC(N);
1900 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1901 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1902 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1903 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1904 case ISD::AssertSext:
1905 case ISD::AssertZext: return visitAssertExt(N);
1906 case ISD::AssertAlign: return visitAssertAlign(N);
1907 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1910 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1911 case ISD::TRUNCATE: return visitTRUNCATE(N);
1912 case ISD::BITCAST: return visitBITCAST(N);
1913 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1914 case ISD::FADD: return visitFADD(N);
1915 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1916 case ISD::FSUB: return visitFSUB(N);
1917 case ISD::FMUL: return visitFMUL(N);
1918 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1919 case ISD::FMAD: return visitFMAD(N);
1920 case ISD::FDIV: return visitFDIV(N);
1921 case ISD::FREM: return visitFREM(N);
1922 case ISD::FSQRT: return visitFSQRT(N);
1923 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1924 case ISD::FPOW: return visitFPOW(N);
1925 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1926 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1927 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1928 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1929 case ISD::LRINT:
1930 case ISD::LLRINT: return visitXRINT(N);
1931 case ISD::FP_ROUND: return visitFP_ROUND(N);
1932 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1933 case ISD::FNEG: return visitFNEG(N);
1934 case ISD::FABS: return visitFABS(N);
1935 case ISD::FFLOOR: return visitFFLOOR(N);
1936 case ISD::FMINNUM:
1937 case ISD::FMAXNUM:
1938 case ISD::FMINIMUM:
1939 case ISD::FMAXIMUM: return visitFMinMax(N);
1940 case ISD::FCEIL: return visitFCEIL(N);
1941 case ISD::FTRUNC: return visitFTRUNC(N);
1942 case ISD::FFREXP: return visitFFREXP(N);
1943 case ISD::BRCOND: return visitBRCOND(N);
1944 case ISD::BR_CC: return visitBR_CC(N);
1945 case ISD::LOAD: return visitLOAD(N);
1946 case ISD::STORE: return visitSTORE(N);
1947 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1948 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1949 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1950 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1951 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1952 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1953 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1954 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1955 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1956 case ISD::MGATHER: return visitMGATHER(N);
1957 case ISD::MLOAD: return visitMLOAD(N);
1958 case ISD::MSCATTER: return visitMSCATTER(N);
1959 case ISD::MSTORE: return visitMSTORE(N);
1960 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1961 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1962 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1963 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1964 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1965 case ISD::FREEZE: return visitFREEZE(N);
1966 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1967 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1970 case ISD::VECREDUCE_ADD:
1971 case ISD::VECREDUCE_MUL:
1972 case ISD::VECREDUCE_AND:
1973 case ISD::VECREDUCE_OR:
1974 case ISD::VECREDUCE_XOR:
1982 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1983#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1984#include "llvm/IR/VPIntrinsics.def"
1985 return visitVPOp(N);
1986 }
1987 // clang-format on
1988 return SDValue();
1989}
1990
1991SDValue DAGCombiner::combine(SDNode *N) {
1992 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1993 return SDValue();
1994
1995 SDValue RV;
1996 if (!DisableGenericCombines)
1997 RV = visit(N);
1998
1999 // If nothing happened, try a target-specific DAG combine.
2000 if (!RV.getNode()) {
2001 assert(N->getOpcode() != ISD::DELETED_NODE &&
2002 "Node was deleted but visit returned NULL!");
2003
2004 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2005 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2006
2007 // Expose the DAG combiner to the target combiner impls.
2009 DagCombineInfo(DAG, Level, false, this);
2010
2011 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2012 }
2013 }
2014
2015 // If nothing happened still, try promoting the operation.
2016 if (!RV.getNode()) {
2017 switch (N->getOpcode()) {
2018 default: break;
2019 case ISD::ADD:
2020 case ISD::SUB:
2021 case ISD::MUL:
2022 case ISD::AND:
2023 case ISD::OR:
2024 case ISD::XOR:
2025 RV = PromoteIntBinOp(SDValue(N, 0));
2026 break;
2027 case ISD::SHL:
2028 case ISD::SRA:
2029 case ISD::SRL:
2030 RV = PromoteIntShiftOp(SDValue(N, 0));
2031 break;
2032 case ISD::SIGN_EXTEND:
2033 case ISD::ZERO_EXTEND:
2034 case ISD::ANY_EXTEND:
2035 RV = PromoteExtend(SDValue(N, 0));
2036 break;
2037 case ISD::LOAD:
2038 if (PromoteLoad(SDValue(N, 0)))
2039 RV = SDValue(N, 0);
2040 break;
2041 }
2042 }
2043
2044 // If N is a commutative binary node, try to eliminate it if the commuted
2045 // version is already present in the DAG.
2046 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2047 SDValue N0 = N->getOperand(0);
2048 SDValue N1 = N->getOperand(1);
2049
2050 // Constant operands are canonicalized to RHS.
2051 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2052 SDValue Ops[] = {N1, N0};
2053 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2054 N->getFlags());
2055 if (CSENode)
2056 return SDValue(CSENode, 0);
2057 }
2058 }
2059
2060 return RV;
2061}
2062
2063/// Given a node, return its input chain if it has one, otherwise return a null
2064/// sd operand.
2066 if (unsigned NumOps = N->getNumOperands()) {
2067 if (N->getOperand(0).getValueType() == MVT::Other)
2068 return N->getOperand(0);
2069 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2070 return N->getOperand(NumOps-1);
2071 for (unsigned i = 1; i < NumOps-1; ++i)
2072 if (N->getOperand(i).getValueType() == MVT::Other)
2073 return N->getOperand(i);
2074 }
2075 return SDValue();
2076}
2077
2078SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2079 // If N has two operands, where one has an input chain equal to the other,
2080 // the 'other' chain is redundant.
2081 if (N->getNumOperands() == 2) {
2082 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2083 return N->getOperand(0);
2084 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2085 return N->getOperand(1);
2086 }
2087
2088 // Don't simplify token factors if optnone.
2089 if (OptLevel == CodeGenOptLevel::None)
2090 return SDValue();
2091
2092 // Don't simplify the token factor if the node itself has too many operands.
2093 if (N->getNumOperands() > TokenFactorInlineLimit)
2094 return SDValue();
2095
2096 // If the sole user is a token factor, we should make sure we have a
2097 // chance to merge them together. This prevents TF chains from inhibiting
2098 // optimizations.
2099 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2100 AddToWorklist(*(N->use_begin()));
2101
2102 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2103 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2105 bool Changed = false; // If we should replace this token factor.
2106
2107 // Start out with this token factor.
2108 TFs.push_back(N);
2109
2110 // Iterate through token factors. The TFs grows when new token factors are
2111 // encountered.
2112 for (unsigned i = 0; i < TFs.size(); ++i) {
2113 // Limit number of nodes to inline, to avoid quadratic compile times.
2114 // We have to add the outstanding Token Factors to Ops, otherwise we might
2115 // drop Ops from the resulting Token Factors.
2116 if (Ops.size() > TokenFactorInlineLimit) {
2117 for (unsigned j = i; j < TFs.size(); j++)
2118 Ops.emplace_back(TFs[j], 0);
2119 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2120 // combiner worklist later.
2121 TFs.resize(i);
2122 break;
2123 }
2124
2125 SDNode *TF = TFs[i];
2126 // Check each of the operands.
2127 for (const SDValue &Op : TF->op_values()) {
2128 switch (Op.getOpcode()) {
2129 case ISD::EntryToken:
2130 // Entry tokens don't need to be added to the list. They are
2131 // redundant.
2132 Changed = true;
2133 break;
2134
2135 case ISD::TokenFactor:
2136 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2137 // Queue up for processing.
2138 TFs.push_back(Op.getNode());
2139 Changed = true;
2140 break;
2141 }
2142 [[fallthrough]];
2143
2144 default:
2145 // Only add if it isn't already in the list.
2146 if (SeenOps.insert(Op.getNode()).second)
2147 Ops.push_back(Op);
2148 else
2149 Changed = true;
2150 break;
2151 }
2152 }
2153 }
2154
2155 // Re-visit inlined Token Factors, to clean them up in case they have been
2156 // removed. Skip the first Token Factor, as this is the current node.
2157 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2158 AddToWorklist(TFs[i]);
2159
2160 // Remove Nodes that are chained to another node in the list. Do so
2161 // by walking up chains breath-first stopping when we've seen
2162 // another operand. In general we must climb to the EntryNode, but we can exit
2163 // early if we find all remaining work is associated with just one operand as
2164 // no further pruning is possible.
2165
2166 // List of nodes to search through and original Ops from which they originate.
2168 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2169 SmallPtrSet<SDNode *, 16> SeenChains;
2170 bool DidPruneOps = false;
2171
2172 unsigned NumLeftToConsider = 0;
2173 for (const SDValue &Op : Ops) {
2174 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2175 OpWorkCount.push_back(1);
2176 }
2177
2178 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2179 // If this is an Op, we can remove the op from the list. Remark any
2180 // search associated with it as from the current OpNumber.
2181 if (SeenOps.contains(Op)) {
2182 Changed = true;
2183 DidPruneOps = true;
2184 unsigned OrigOpNumber = 0;
2185 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2186 OrigOpNumber++;
2187 assert((OrigOpNumber != Ops.size()) &&
2188 "expected to find TokenFactor Operand");
2189 // Re-mark worklist from OrigOpNumber to OpNumber
2190 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2191 if (Worklist[i].second == OrigOpNumber) {
2192 Worklist[i].second = OpNumber;
2193 }
2194 }
2195 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2196 OpWorkCount[OrigOpNumber] = 0;
2197 NumLeftToConsider--;
2198 }
2199 // Add if it's a new chain
2200 if (SeenChains.insert(Op).second) {
2201 OpWorkCount[OpNumber]++;
2202 Worklist.push_back(std::make_pair(Op, OpNumber));
2203 }
2204 };
2205
2206 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2207 // We need at least be consider at least 2 Ops to prune.
2208 if (NumLeftToConsider <= 1)
2209 break;
2210 auto CurNode = Worklist[i].first;
2211 auto CurOpNumber = Worklist[i].second;
2212 assert((OpWorkCount[CurOpNumber] > 0) &&
2213 "Node should not appear in worklist");
2214 switch (CurNode->getOpcode()) {
2215 case ISD::EntryToken:
2216 // Hitting EntryToken is the only way for the search to terminate without
2217 // hitting
2218 // another operand's search. Prevent us from marking this operand
2219 // considered.
2220 NumLeftToConsider++;
2221 break;
2222 case ISD::TokenFactor:
2223 for (const SDValue &Op : CurNode->op_values())
2224 AddToWorklist(i, Op.getNode(), CurOpNumber);
2225 break;
2227 case ISD::LIFETIME_END:
2228 case ISD::CopyFromReg:
2229 case ISD::CopyToReg:
2230 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2231 break;
2232 default:
2233 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2234 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2235 break;
2236 }
2237 OpWorkCount[CurOpNumber]--;
2238 if (OpWorkCount[CurOpNumber] == 0)
2239 NumLeftToConsider--;
2240 }
2241
2242 // If we've changed things around then replace token factor.
2243 if (Changed) {
2245 if (Ops.empty()) {
2246 // The entry token is the only possible outcome.
2247 Result = DAG.getEntryNode();
2248 } else {
2249 if (DidPruneOps) {
2250 SmallVector<SDValue, 8> PrunedOps;
2251 //
2252 for (const SDValue &Op : Ops) {
2253 if (SeenChains.count(Op.getNode()) == 0)
2254 PrunedOps.push_back(Op);
2255 }
2256 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2257 } else {
2258 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2259 }
2260 }
2261 return Result;
2262 }
2263 return SDValue();
2264}
2265
2266/// MERGE_VALUES can always be eliminated.
2267SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2268 WorklistRemover DeadNodes(*this);
2269 // Replacing results may cause a different MERGE_VALUES to suddenly
2270 // be CSE'd with N, and carry its uses with it. Iterate until no
2271 // uses remain, to ensure that the node can be safely deleted.
2272 // First add the users of this node to the work list so that they
2273 // can be tried again once they have new operands.
2274 AddUsersToWorklist(N);
2275 do {
2276 // Do as a single replacement to avoid rewalking use lists.
2278 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2279 Ops.push_back(N->getOperand(i));
2280 DAG.ReplaceAllUsesWith(N, Ops.data());
2281 } while (!N->use_empty());
2282 deleteAndRecombine(N);
2283 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2284}
2285
2286/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2287/// ConstantSDNode pointer else nullptr.
2289 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2290 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2291}
2292
2293// isTruncateOf - If N is a truncate of some other value, return true, record
2294// the value being truncated in Op and which of Op's bits are zero/one in Known.
2295// This function computes KnownBits to avoid a duplicated call to
2296// computeKnownBits in the caller.
2298 KnownBits &Known) {
2299 if (N->getOpcode() == ISD::TRUNCATE) {
2300 Op = N->getOperand(0);
2301 Known = DAG.computeKnownBits(Op);
2302 return true;
2303 }
2304
2305 if (N.getOpcode() != ISD::SETCC ||
2306 N.getValueType().getScalarType() != MVT::i1 ||
2307 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2308 return false;
2309
2310 SDValue Op0 = N->getOperand(0);
2311 SDValue Op1 = N->getOperand(1);
2312 assert(Op0.getValueType() == Op1.getValueType());
2313
2314 if (isNullOrNullSplat(Op0))
2315 Op = Op1;
2316 else if (isNullOrNullSplat(Op1))
2317 Op = Op0;
2318 else
2319 return false;
2320
2321 Known = DAG.computeKnownBits(Op);
2322
2323 return (Known.Zero | 1).isAllOnes();
2324}
2325
2326/// Return true if 'Use' is a load or a store that uses N as its base pointer
2327/// and that N may be folded in the load / store addressing mode.
2329 const TargetLowering &TLI) {
2330 EVT VT;
2331 unsigned AS;
2332
2333 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2334 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2335 return false;
2336 VT = LD->getMemoryVT();
2337 AS = LD->getAddressSpace();
2338 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2339 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2340 return false;
2341 VT = ST->getMemoryVT();
2342 AS = ST->getAddressSpace();
2343 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2344 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2345 return false;
2346 VT = LD->getMemoryVT();
2347 AS = LD->getAddressSpace();
2348 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2349 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2350 return false;
2351 VT = ST->getMemoryVT();
2352 AS = ST->getAddressSpace();
2353 } else {
2354 return false;
2355 }
2356
2358 if (N->getOpcode() == ISD::ADD) {
2359 AM.HasBaseReg = true;
2360 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2361 if (Offset)
2362 // [reg +/- imm]
2363 AM.BaseOffs = Offset->getSExtValue();
2364 else
2365 // [reg +/- reg]
2366 AM.Scale = 1;
2367 } else if (N->getOpcode() == ISD::SUB) {
2368 AM.HasBaseReg = true;
2369 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2370 if (Offset)
2371 // [reg +/- imm]
2372 AM.BaseOffs = -Offset->getSExtValue();
2373 else
2374 // [reg +/- reg]
2375 AM.Scale = 1;
2376 } else {
2377 return false;
2378 }
2379
2380 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2381 VT.getTypeForEVT(*DAG.getContext()), AS);
2382}
2383
2384/// This inverts a canonicalization in IR that replaces a variable select arm
2385/// with an identity constant. Codegen improves if we re-use the variable
2386/// operand rather than load a constant. This can also be converted into a
2387/// masked vector operation if the target supports it.
2389 bool ShouldCommuteOperands) {
2390 // Match a select as operand 1. The identity constant that we are looking for
2391 // is only valid as operand 1 of a non-commutative binop.
2392 SDValue N0 = N->getOperand(0);
2393 SDValue N1 = N->getOperand(1);
2394 if (ShouldCommuteOperands)
2395 std::swap(N0, N1);
2396
2397 // TODO: Should this apply to scalar select too?
2398 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2399 return SDValue();
2400
2401 // We can't hoist all instructions because of immediate UB (not speculatable).
2402 // For example div/rem by zero.
2404 return SDValue();
2405
2406 unsigned Opcode = N->getOpcode();
2407 EVT VT = N->getValueType(0);
2408 SDValue Cond = N1.getOperand(0);
2409 SDValue TVal = N1.getOperand(1);
2410 SDValue FVal = N1.getOperand(2);
2411
2412 // This transform increases uses of N0, so freeze it to be safe.
2413 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2414 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2415 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2416 SDValue F0 = DAG.getFreeze(N0);
2417 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2418 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2419 }
2420 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2421 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2422 SDValue F0 = DAG.getFreeze(N0);
2423 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2424 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2425 }
2426
2427 return SDValue();
2428}
2429
2430SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2431 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2432 "Unexpected binary operator");
2433
2434 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2435 auto BinOpcode = BO->getOpcode();
2436 EVT VT = BO->getValueType(0);
2437 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2438 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2439 return Sel;
2440
2441 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2442 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2443 return Sel;
2444 }
2445
2446 // Don't do this unless the old select is going away. We want to eliminate the
2447 // binary operator, not replace a binop with a select.
2448 // TODO: Handle ISD::SELECT_CC.
2449 unsigned SelOpNo = 0;
2450 SDValue Sel = BO->getOperand(0);
2451 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2452 SelOpNo = 1;
2453 Sel = BO->getOperand(1);
2454
2455 // Peek through trunc to shift amount type.
2456 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2457 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2458 // This is valid when the truncated bits of x are already zero.
2459 SDValue Op;
2460 KnownBits Known;
2461 if (isTruncateOf(DAG, Sel, Op, Known) &&
2463 Sel = Op;
2464 }
2465 }
2466
2467 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2468 return SDValue();
2469
2470 SDValue CT = Sel.getOperand(1);
2471 if (!isConstantOrConstantVector(CT, true) &&
2473 return SDValue();
2474
2475 SDValue CF = Sel.getOperand(2);
2476 if (!isConstantOrConstantVector(CF, true) &&
2478 return SDValue();
2479
2480 // Bail out if any constants are opaque because we can't constant fold those.
2481 // The exception is "and" and "or" with either 0 or -1 in which case we can
2482 // propagate non constant operands into select. I.e.:
2483 // and (select Cond, 0, -1), X --> select Cond, 0, X
2484 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2485 bool CanFoldNonConst =
2486 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2489
2490 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2491 if (!CanFoldNonConst &&
2492 !isConstantOrConstantVector(CBO, true) &&
2494 return SDValue();
2495
2496 SDLoc DL(Sel);
2497 SDValue NewCT, NewCF;
2498
2499 if (CanFoldNonConst) {
2500 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2501 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2502 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2503 NewCT = CT;
2504 else
2505 NewCT = CBO;
2506
2507 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2508 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2509 NewCF = CF;
2510 else
2511 NewCF = CBO;
2512 } else {
2513 // We have a select-of-constants followed by a binary operator with a
2514 // constant. Eliminate the binop by pulling the constant math into the
2515 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2516 // CBO, CF + CBO
2517 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2518 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2519 if (!NewCT)
2520 return SDValue();
2521
2522 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2523 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2524 if (!NewCF)
2525 return SDValue();
2526 }
2527
2528 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2529 SelectOp->setFlags(BO->getFlags());
2530 return SelectOp;
2531}
2532
2534 SelectionDAG &DAG) {
2535 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2536 "Expecting add or sub");
2537
2538 // Match a constant operand and a zext operand for the math instruction:
2539 // add Z, C
2540 // sub C, Z
2541 bool IsAdd = N->getOpcode() == ISD::ADD;
2542 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2543 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2544 auto *CN = dyn_cast<ConstantSDNode>(C);
2545 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2546 return SDValue();
2547
2548 // Match the zext operand as a setcc of a boolean.
2549 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2550 Z.getOperand(0).getValueType() != MVT::i1)
2551 return SDValue();
2552
2553 // Match the compare as: setcc (X & 1), 0, eq.
2554 SDValue SetCC = Z.getOperand(0);
2555 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2556 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2557 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2558 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2559 return SDValue();
2560
2561 // We are adding/subtracting a constant and an inverted low bit. Turn that
2562 // into a subtract/add of the low bit with incremented/decremented constant:
2563 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2564 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2565 EVT VT = C.getValueType();
2566 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2567 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2568 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2569 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2570}
2571
2572// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2573SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2574 SDValue N0 = N->getOperand(0);
2575 EVT VT = N0.getValueType();
2576 SDValue A, B;
2577
2578 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2581 m_SpecificInt(1))))) {
2582 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2583 }
2584 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2587 m_SpecificInt(1))))) {
2588 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2589 }
2590 return SDValue();
2591}
2592
2593/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2594/// a shift and add with a different constant.
2596 SelectionDAG &DAG) {
2597 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2598 "Expecting add or sub");
2599
2600 // We need a constant operand for the add/sub, and the other operand is a
2601 // logical shift right: add (srl), C or sub C, (srl).
2602 bool IsAdd = N->getOpcode() == ISD::ADD;
2603 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2604 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2605 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2606 ShiftOp.getOpcode() != ISD::SRL)
2607 return SDValue();
2608
2609 // The shift must be of a 'not' value.
2610 SDValue Not = ShiftOp.getOperand(0);
2611 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2612 return SDValue();
2613
2614 // The shift must be moving the sign bit to the least-significant-bit.
2615 EVT VT = ShiftOp.getValueType();
2616 SDValue ShAmt = ShiftOp.getOperand(1);
2617 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2618 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2619 return SDValue();
2620
2621 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2622 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2623 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2624 if (SDValue NewC = DAG.FoldConstantArithmetic(
2625 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2626 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2627 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2628 Not.getOperand(0), ShAmt);
2629 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2630 }
2631
2632 return SDValue();
2633}
2634
2635static bool
2637 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2638 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2639}
2640
2641/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2642/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2643/// are no common bits set in the operands).
2644SDValue DAGCombiner::visitADDLike(SDNode *N) {
2645 SDValue N0 = N->getOperand(0);
2646 SDValue N1 = N->getOperand(1);
2647 EVT VT = N0.getValueType();
2648 SDLoc DL(N);
2649
2650 // fold (add x, undef) -> undef
2651 if (N0.isUndef())
2652 return N0;
2653 if (N1.isUndef())
2654 return N1;
2655
2656 // fold (add c1, c2) -> c1+c2
2657 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2658 return C;
2659
2660 // canonicalize constant to RHS
2663 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2664
2665 if (areBitwiseNotOfEachother(N0, N1))
2667 SDLoc(N), VT);
2668
2669 // fold vector ops
2670 if (VT.isVector()) {
2671 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2672 return FoldedVOp;
2673
2674 // fold (add x, 0) -> x, vector edition
2676 return N0;
2677 }
2678
2679 // fold (add x, 0) -> x
2680 if (isNullConstant(N1))
2681 return N0;
2682
2683 if (N0.getOpcode() == ISD::SUB) {
2684 SDValue N00 = N0.getOperand(0);
2685 SDValue N01 = N0.getOperand(1);
2686
2687 // fold ((A-c1)+c2) -> (A+(c2-c1))
2688 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2689 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2690
2691 // fold ((c1-A)+c2) -> (c1+c2)-A
2692 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2693 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2694 }
2695
2696 // add (sext i1 X), 1 -> zext (not i1 X)
2697 // We don't transform this pattern:
2698 // add (zext i1 X), -1 -> sext (not i1 X)
2699 // because most (?) targets generate better code for the zext form.
2700 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2701 isOneOrOneSplat(N1)) {
2702 SDValue X = N0.getOperand(0);
2703 if ((!LegalOperations ||
2704 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2706 X.getScalarValueSizeInBits() == 1) {
2707 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2708 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2709 }
2710 }
2711
2712 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2713 // iff (or x, c0) is equivalent to (add x, c0).
2714 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2715 // iff (xor x, c0) is equivalent to (add x, c0).
2716 if (DAG.isADDLike(N0)) {
2717 SDValue N01 = N0.getOperand(1);
2718 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2719 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2720 }
2721
2722 if (SDValue NewSel = foldBinOpIntoSelect(N))
2723 return NewSel;
2724
2725 // reassociate add
2726 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2727 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2728 return RADD;
2729
2730 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2731 // equivalent to (add x, c).
2732 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2733 // equivalent to (add x, c).
2734 // Do this optimization only when adding c does not introduce instructions
2735 // for adding carries.
2736 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2737 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2738 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2739 // If N0's type does not split or is a sign mask, it does not introduce
2740 // add carry.
2741 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2742 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2745 if (NoAddCarry)
2746 return DAG.getNode(
2747 ISD::ADD, DL, VT,
2748 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2749 N0.getOperand(1));
2750 }
2751 return SDValue();
2752 };
2753 if (SDValue Add = ReassociateAddOr(N0, N1))
2754 return Add;
2755 if (SDValue Add = ReassociateAddOr(N1, N0))
2756 return Add;
2757
2758 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2759 if (SDValue SD =
2760 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2761 return SD;
2762 }
2763
2764 SDValue A, B, C;
2765
2766 // fold ((0-A) + B) -> B-A
2767 if (sd_match(N0, m_Neg(m_Value(A))))
2768 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2769
2770 // fold (A + (0-B)) -> A-B
2771 if (sd_match(N1, m_Neg(m_Value(B))))
2772 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2773
2774 // fold (A+(B-A)) -> B
2775 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2776 return B;
2777
2778 // fold ((B-A)+A) -> B
2779 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2780 return B;
2781
2782 // fold ((A-B)+(C-A)) -> (C-B)
2783 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2785 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2786
2787 // fold ((A-B)+(B-C)) -> (A-C)
2788 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2790 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2791
2792 // fold (A+(B-(A+C))) to (B-C)
2793 // fold (A+(B-(C+A))) to (B-C)
2794 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2795 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2796
2797 // fold (A+((B-A)+or-C)) to (B+or-C)
2798 if (sd_match(N1,
2800 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2801 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2802
2803 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2804 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2805 N0->hasOneUse() && N1->hasOneUse()) {
2806 SDValue N00 = N0.getOperand(0);
2807 SDValue N01 = N0.getOperand(1);
2808 SDValue N10 = N1.getOperand(0);
2809 SDValue N11 = N1.getOperand(1);
2810
2812 return DAG.getNode(ISD::SUB, DL, VT,
2813 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2814 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2815 }
2816
2817 // fold (add (umax X, C), -C) --> (usubsat X, C)
2818 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2819 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2820 return (!Max && !Op) ||
2821 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2822 };
2823 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2824 /*AllowUndefs*/ true))
2825 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2826 N0.getOperand(1));
2827 }
2828
2830 return SDValue(N, 0);
2831
2832 if (isOneOrOneSplat(N1)) {
2833 // fold (add (xor a, -1), 1) -> (sub 0, a)
2834 if (isBitwiseNot(N0))
2835 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2836 N0.getOperand(0));
2837
2838 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2839 if (N0.getOpcode() == ISD::ADD) {
2840 SDValue A, Xor;
2841
2842 if (isBitwiseNot(N0.getOperand(0))) {
2843 A = N0.getOperand(1);
2844 Xor = N0.getOperand(0);
2845 } else if (isBitwiseNot(N0.getOperand(1))) {
2846 A = N0.getOperand(0);
2847 Xor = N0.getOperand(1);
2848 }
2849
2850 if (Xor)
2851 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2852 }
2853
2854 // Look for:
2855 // add (add x, y), 1
2856 // And if the target does not like this form then turn into:
2857 // sub y, (xor x, -1)
2858 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2859 N0.hasOneUse() &&
2860 // Limit this to after legalization if the add has wrap flags
2861 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2862 !N->getFlags().hasNoSignedWrap()))) {
2863 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2864 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2865 }
2866 }
2867
2868 // (x - y) + -1 -> add (xor y, -1), x
2869 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2870 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2871 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2872 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2873 }
2874
2875 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2876 // This can help if the inner add has multiple uses.
2877 APInt CM, CA;
2878 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2879 if (VT.getScalarSizeInBits() <= 64) {
2881 m_ConstInt(CM)))) &&
2883 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2885 // If all the inputs are nuw, the outputs can be nuw. If all the input
2886 // are _also_ nsw the outputs can be too.
2887 if (N->getFlags().hasNoUnsignedWrap() &&
2888 N0->getFlags().hasNoUnsignedWrap() &&
2890 Flags.setNoUnsignedWrap(true);
2891 if (N->getFlags().hasNoSignedWrap() &&
2892 N0->getFlags().hasNoSignedWrap() &&
2894 Flags.setNoSignedWrap(true);
2895 }
2896 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2897 DAG.getConstant(CM, DL, VT), Flags);
2898 return DAG.getNode(
2899 ISD::ADD, DL, VT, Mul,
2900 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2901 }
2902 // Also look in case there is an intermediate add.
2903 if (sd_match(N0, m_OneUse(m_Add(
2905 m_ConstInt(CM))),
2906 m_Value(B)))) &&
2908 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2910 // If all the inputs are nuw, the outputs can be nuw. If all the input
2911 // are _also_ nsw the outputs can be too.
2912 SDValue OMul =
2913 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2914 if (N->getFlags().hasNoUnsignedWrap() &&
2915 N0->getFlags().hasNoUnsignedWrap() &&
2916 OMul->getFlags().hasNoUnsignedWrap() &&
2917 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2918 Flags.setNoUnsignedWrap(true);
2919 if (N->getFlags().hasNoSignedWrap() &&
2920 N0->getFlags().hasNoSignedWrap() &&
2921 OMul->getFlags().hasNoSignedWrap() &&
2922 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2923 Flags.setNoSignedWrap(true);
2924 }
2925 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2926 DAG.getConstant(CM, DL, VT), Flags);
2927 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2928 return DAG.getNode(
2929 ISD::ADD, DL, VT, Add,
2930 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2931 }
2932 }
2933 }
2934
2935 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2936 return Combined;
2937
2938 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2939 return Combined;
2940
2941 return SDValue();
2942}
2943
2944// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2945SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2946 SDValue N0 = N->getOperand(0);
2947 EVT VT = N0.getValueType();
2948 SDValue A, B;
2949
2950 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2953 m_SpecificInt(1))))) {
2954 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2955 }
2956 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2959 m_SpecificInt(1))))) {
2960 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2961 }
2962
2963 return SDValue();
2964}
2965
2966SDValue DAGCombiner::visitADD(SDNode *N) {
2967 SDValue N0 = N->getOperand(0);
2968 SDValue N1 = N->getOperand(1);
2969 EVT VT = N0.getValueType();
2970 SDLoc DL(N);
2971
2972 if (SDValue Combined = visitADDLike(N))
2973 return Combined;
2974
2975 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2976 return V;
2977
2978 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2979 return V;
2980
2981 // Try to match AVGFLOOR fixedwidth pattern
2982 if (SDValue V = foldAddToAvg(N, DL))
2983 return V;
2984
2985 // fold (a+b) -> (a|b) iff a and b share no bits.
2986 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2987 DAG.haveNoCommonBitsSet(N0, N1)) {
2989 Flags.setDisjoint(true);
2990 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2991 }
2992
2993 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2994 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2995 const APInt &C0 = N0->getConstantOperandAPInt(0);
2996 const APInt &C1 = N1->getConstantOperandAPInt(0);
2997 return DAG.getVScale(DL, VT, C0 + C1);
2998 }
2999
3000 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3001 if (N0.getOpcode() == ISD::ADD &&
3002 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3003 N1.getOpcode() == ISD::VSCALE) {
3004 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3005 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3006 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3007 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3008 }
3009
3010 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3011 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3012 N1.getOpcode() == ISD::STEP_VECTOR) {
3013 const APInt &C0 = N0->getConstantOperandAPInt(0);
3014 const APInt &C1 = N1->getConstantOperandAPInt(0);
3015 APInt NewStep = C0 + C1;
3016 return DAG.getStepVector(DL, VT, NewStep);
3017 }
3018
3019 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3020 if (N0.getOpcode() == ISD::ADD &&
3022 N1.getOpcode() == ISD::STEP_VECTOR) {
3023 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3024 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3025 APInt NewStep = SV0 + SV1;
3026 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3027 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3028 }
3029
3030 return SDValue();
3031}
3032
3033SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3034 unsigned Opcode = N->getOpcode();
3035 SDValue N0 = N->getOperand(0);
3036 SDValue N1 = N->getOperand(1);
3037 EVT VT = N0.getValueType();
3038 bool IsSigned = Opcode == ISD::SADDSAT;
3039 SDLoc DL(N);
3040
3041 // fold (add_sat x, undef) -> -1
3042 if (N0.isUndef() || N1.isUndef())
3043 return DAG.getAllOnesConstant(DL, VT);
3044
3045 // fold (add_sat c1, c2) -> c3
3046 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3047 return C;
3048
3049 // canonicalize constant to RHS
3052 return DAG.getNode(Opcode, DL, VT, N1, N0);
3053
3054 // fold vector ops
3055 if (VT.isVector()) {
3056 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3057 return FoldedVOp;
3058
3059 // fold (add_sat x, 0) -> x, vector edition
3061 return N0;
3062 }
3063
3064 // fold (add_sat x, 0) -> x
3065 if (isNullConstant(N1))
3066 return N0;
3067
3068 // If it cannot overflow, transform into an add.
3069 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3070 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3071
3072 return SDValue();
3073}
3074
3076 bool ForceCarryReconstruction = false) {
3077 bool Masked = false;
3078
3079 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3080 while (true) {
3081 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3082 V = V.getOperand(0);
3083 continue;
3084 }
3085
3086 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3087 if (ForceCarryReconstruction)
3088 return V;
3089
3090 Masked = true;
3091 V = V.getOperand(0);
3092 continue;
3093 }
3094
3095 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3096 return V;
3097
3098 break;
3099 }
3100
3101 // If this is not a carry, return.
3102 if (V.getResNo() != 1)
3103 return SDValue();
3104
3105 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3106 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3107 return SDValue();
3108
3109 EVT VT = V->getValueType(0);
3110 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3111 return SDValue();
3112
3113 // If the result is masked, then no matter what kind of bool it is we can
3114 // return. If it isn't, then we need to make sure the bool type is either 0 or
3115 // 1 and not other values.
3116 if (Masked ||
3117 TLI.getBooleanContents(V.getValueType()) ==
3119 return V;
3120
3121 return SDValue();
3122}
3123
3124/// Given the operands of an add/sub operation, see if the 2nd operand is a
3125/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3126/// the opcode and bypass the mask operation.
3127static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3128 SelectionDAG &DAG, const SDLoc &DL) {
3129 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3130 N1 = N1.getOperand(0);
3131
3132 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3133 return SDValue();
3134
3135 EVT VT = N0.getValueType();
3136 SDValue N10 = N1.getOperand(0);
3137 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3138 N10 = N10.getOperand(0);
3139
3140 if (N10.getValueType() != VT)
3141 return SDValue();
3142
3143 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3144 return SDValue();
3145
3146 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3147 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3148 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3149}
3150
3151/// Helper for doing combines based on N0 and N1 being added to each other.
3152SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3153 SDNode *LocReference) {
3154 EVT VT = N0.getValueType();
3155 SDLoc DL(LocReference);
3156
3157 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3158 SDValue Y, N;
3159 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3160 return DAG.getNode(ISD::SUB, DL, VT, N0,
3161 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3162
3163 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3164 return V;
3165
3166 // Look for:
3167 // add (add x, 1), y
3168 // And if the target does not like this form then turn into:
3169 // sub y, (xor x, -1)
3170 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3171 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3172 // Limit this to after legalization if the add has wrap flags
3173 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3174 !N0->getFlags().hasNoSignedWrap()))) {
3175 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3176 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3177 }
3178
3179 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3180 // Hoist one-use subtraction by non-opaque constant:
3181 // (x - C) + y -> (x + y) - C
3182 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3183 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3184 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3185 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3186 }
3187 // Hoist one-use subtraction from non-opaque constant:
3188 // (C - x) + y -> (y - x) + C
3189 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3190 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3191 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3192 }
3193 }
3194
3195 // add (mul x, C), x -> mul x, C+1
3196 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3197 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3198 N0.hasOneUse()) {
3199 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3200 DAG.getConstant(1, DL, VT));
3201 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3202 }
3203
3204 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3205 // rather than 'add 0/-1' (the zext should get folded).
3206 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3207 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3208 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3210 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3211 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3212 }
3213
3214 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3215 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3216 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3217 if (TN->getVT() == MVT::i1) {
3218 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3219 DAG.getConstant(1, DL, VT));
3220 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3221 }
3222 }
3223
3224 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3225 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3226 N1.getResNo() == 0)
3227 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3228 N0, N1.getOperand(0), N1.getOperand(2));
3229
3230 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3232 if (SDValue Carry = getAsCarry(TLI, N1))
3233 return DAG.getNode(ISD::UADDO_CARRY, DL,
3234 DAG.getVTList(VT, Carry.getValueType()), N0,
3235 DAG.getConstant(0, DL, VT), Carry);
3236
3237 return SDValue();
3238}
3239
3240SDValue DAGCombiner::visitADDC(SDNode *N) {
3241 SDValue N0 = N->getOperand(0);
3242 SDValue N1 = N->getOperand(1);
3243 EVT VT = N0.getValueType();
3244 SDLoc DL(N);
3245
3246 // If the flag result is dead, turn this into an ADD.
3247 if (!N->hasAnyUseOfValue(1))
3248 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3249 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3250
3251 // canonicalize constant to RHS.
3252 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3253 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3254 if (N0C && !N1C)
3255 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3256
3257 // fold (addc x, 0) -> x + no carry out
3258 if (isNullConstant(N1))
3259 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3260 DL, MVT::Glue));
3261
3262 // If it cannot overflow, transform into an add.
3264 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3265 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3266
3267 return SDValue();
3268}
3269
3270/**
3271 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3272 * then the flip also occurs if computing the inverse is the same cost.
3273 * This function returns an empty SDValue in case it cannot flip the boolean
3274 * without increasing the cost of the computation. If you want to flip a boolean
3275 * no matter what, use DAG.getLogicalNOT.
3276 */
3278 const TargetLowering &TLI,
3279 bool Force) {
3280 if (Force && isa<ConstantSDNode>(V))
3281 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3282
3283 if (V.getOpcode() != ISD::XOR)
3284 return SDValue();
3285
3286 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3287 if (!Const)
3288 return SDValue();
3289
3290 EVT VT = V.getValueType();
3291
3292 bool IsFlip = false;
3293 switch(TLI.getBooleanContents(VT)) {
3295 IsFlip = Const->isOne();
3296 break;
3298 IsFlip = Const->isAllOnes();
3299 break;
3301 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3302 break;
3303 }
3304
3305 if (IsFlip)
3306 return V.getOperand(0);
3307 if (Force)
3308 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3309 return SDValue();
3310}
3311
3312SDValue DAGCombiner::visitADDO(SDNode *N) {
3313 SDValue N0 = N->getOperand(0);
3314 SDValue N1 = N->getOperand(1);
3315 EVT VT = N0.getValueType();
3316 bool IsSigned = (ISD::SADDO == N->getOpcode());
3317
3318 EVT CarryVT = N->getValueType(1);
3319 SDLoc DL(N);
3320
3321 // If the flag result is dead, turn this into an ADD.
3322 if (!N->hasAnyUseOfValue(1))
3323 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3324 DAG.getUNDEF(CarryVT));
3325
3326 // canonicalize constant to RHS.
3329 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3330
3331 // fold (addo x, 0) -> x + no carry out
3332 if (isNullOrNullSplat(N1))
3333 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3334
3335 // If it cannot overflow, transform into an add.
3336 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3337 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3338 DAG.getConstant(0, DL, CarryVT));
3339
3340 if (IsSigned) {
3341 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3342 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3343 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3344 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3345 } else {
3346 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3347 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3348 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3349 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3350 return CombineTo(
3351 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3352 }
3353
3354 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3355 return Combined;
3356
3357 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3358 return Combined;
3359 }
3360
3361 return SDValue();
3362}
3363
3364SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3365 EVT VT = N0.getValueType();
3366 if (VT.isVector())
3367 return SDValue();
3368
3369 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3370 // If Y + 1 cannot overflow.
3371 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3372 SDValue Y = N1.getOperand(0);
3373 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3375 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3376 N1.getOperand(2));
3377 }
3378
3379 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3381 if (SDValue Carry = getAsCarry(TLI, N1))
3382 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3383 DAG.getConstant(0, SDLoc(N), VT), Carry);
3384
3385 return SDValue();
3386}
3387
3388SDValue DAGCombiner::visitADDE(SDNode *N) {
3389 SDValue N0 = N->getOperand(0);
3390 SDValue N1 = N->getOperand(1);
3391 SDValue CarryIn = N->getOperand(2);
3392
3393 // canonicalize constant to RHS
3394 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3395 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3396 if (N0C && !N1C)
3397 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3398 N1, N0, CarryIn);
3399
3400 // fold (adde x, y, false) -> (addc x, y)
3401 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3402 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3403
3404 return SDValue();
3405}
3406
3407SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3408 SDValue N0 = N->getOperand(0);
3409 SDValue N1 = N->getOperand(1);
3410 SDValue CarryIn = N->getOperand(2);
3411 SDLoc DL(N);
3412
3413 // canonicalize constant to RHS
3414 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3415 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3416 if (N0C && !N1C)
3417 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3418
3419 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3420 if (isNullConstant(CarryIn)) {
3421 if (!LegalOperations ||
3422 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3423 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3424 }
3425
3426 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3427 if (isNullConstant(N0) && isNullConstant(N1)) {
3428 EVT VT = N0.getValueType();
3429 EVT CarryVT = CarryIn.getValueType();
3430 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3431 AddToWorklist(CarryExt.getNode());
3432 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3433 DAG.getConstant(1, DL, VT)),
3434 DAG.getConstant(0, DL, CarryVT));
3435 }
3436
3437 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3438 return Combined;
3439
3440 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3441 return Combined;
3442
3443 // We want to avoid useless duplication.
3444 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3445 // not a binary operation, this is not really possible to leverage this
3446 // existing mechanism for it. However, if more operations require the same
3447 // deduplication logic, then it may be worth generalize.
3448 SDValue Ops[] = {N1, N0, CarryIn};
3449 SDNode *CSENode =
3450 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3451 if (CSENode)
3452 return SDValue(CSENode, 0);
3453
3454 return SDValue();
3455}
3456
3457/**
3458 * If we are facing some sort of diamond carry propagation pattern try to
3459 * break it up to generate something like:
3460 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3461 *
3462 * The end result is usually an increase in operation required, but because the
3463 * carry is now linearized, other transforms can kick in and optimize the DAG.
3464 *
3465 * Patterns typically look something like
3466 * (uaddo A, B)
3467 * / \
3468 * Carry Sum
3469 * | \
3470 * | (uaddo_carry *, 0, Z)
3471 * | /
3472 * \ Carry
3473 * | /
3474 * (uaddo_carry X, *, *)
3475 *
3476 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3477 * produce a combine with a single path for carry propagation.
3478 */
3480 SelectionDAG &DAG, SDValue X,
3481 SDValue Carry0, SDValue Carry1,
3482 SDNode *N) {
3483 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3484 return SDValue();
3485 if (Carry1.getOpcode() != ISD::UADDO)
3486 return SDValue();
3487
3488 SDValue Z;
3489
3490 /**
3491 * First look for a suitable Z. It will present itself in the form of
3492 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3493 */
3494 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3495 isNullConstant(Carry0.getOperand(1))) {
3496 Z = Carry0.getOperand(2);
3497 } else if (Carry0.getOpcode() == ISD::UADDO &&
3498 isOneConstant(Carry0.getOperand(1))) {
3499 EVT VT = Carry0->getValueType(1);
3500 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3501 } else {
3502 // We couldn't find a suitable Z.
3503 return SDValue();
3504 }
3505
3506
3507 auto cancelDiamond = [&](SDValue A,SDValue B) {
3508 SDLoc DL(N);
3509 SDValue NewY =
3510 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3511 Combiner.AddToWorklist(NewY.getNode());
3512 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3513 DAG.getConstant(0, DL, X.getValueType()),
3514 NewY.getValue(1));
3515 };
3516
3517 /**
3518 * (uaddo A, B)
3519 * |
3520 * Sum
3521 * |
3522 * (uaddo_carry *, 0, Z)
3523 */
3524 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3525 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3526 }
3527
3528 /**
3529 * (uaddo_carry A, 0, Z)
3530 * |
3531 * Sum
3532 * |
3533 * (uaddo *, B)
3534 */
3535 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3536 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3537 }
3538
3539 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3540 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3541 }
3542
3543 return SDValue();
3544}
3545
3546// If we are facing some sort of diamond carry/borrow in/out pattern try to
3547// match patterns like:
3548//
3549// (uaddo A, B) CarryIn
3550// | \ |
3551// | \ |
3552// PartialSum PartialCarryOutX /
3553// | | /
3554// | ____|____________/
3555// | / |
3556// (uaddo *, *) \________
3557// | \ \
3558// | \ |
3559// | PartialCarryOutY |
3560// | \ |
3561// | \ /
3562// AddCarrySum | ______/
3563// | /
3564// CarryOut = (or *, *)
3565//
3566// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3567//
3568// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3569//
3570// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3571// with a single path for carry/borrow out propagation.
3573 SDValue N0, SDValue N1, SDNode *N) {
3574 SDValue Carry0 = getAsCarry(TLI, N0);
3575 if (!Carry0)
3576 return SDValue();
3577 SDValue Carry1 = getAsCarry(TLI, N1);
3578 if (!Carry1)
3579 return SDValue();
3580
3581 unsigned Opcode = Carry0.getOpcode();
3582 if (Opcode != Carry1.getOpcode())
3583 return SDValue();
3584 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3585 return SDValue();
3586 // Guarantee identical type of CarryOut
3587 EVT CarryOutType = N->getValueType(0);
3588 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3589 CarryOutType != Carry1.getValue(1).getValueType())
3590 return SDValue();
3591
3592 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3593 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3594 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3595 std::swap(Carry0, Carry1);
3596
3597 // Check if nodes are connected in expected way.
3598 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3599 Carry1.getOperand(1) != Carry0.getValue(0))
3600 return SDValue();
3601
3602 // The carry in value must be on the righthand side for subtraction.
3603 unsigned CarryInOperandNum =
3604 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3605 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3606 return SDValue();
3607 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3608
3609 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3610 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3611 return SDValue();
3612
3613 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3614 CarryIn = getAsCarry(TLI, CarryIn, true);
3615 if (!CarryIn)
3616 return SDValue();
3617
3618 SDLoc DL(N);
3619 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3620 Carry1->getValueType(0));
3621 SDValue Merged =
3622 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3623 Carry0.getOperand(1), CarryIn);
3624
3625 // Please note that because we have proven that the result of the UADDO/USUBO
3626 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3627 // therefore prove that if the first UADDO/USUBO overflows, the second
3628 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3629 // maximum value.
3630 //
3631 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3632 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3633 //
3634 // This is important because it means that OR and XOR can be used to merge
3635 // carry flags; and that AND can return a constant zero.
3636 //
3637 // TODO: match other operations that can merge flags (ADD, etc)
3638 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3639 if (N->getOpcode() == ISD::AND)
3640 return DAG.getConstant(0, DL, CarryOutType);
3641 return Merged.getValue(1);
3642}
3643
3644SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3645 SDValue CarryIn, SDNode *N) {
3646 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3647 // carry.
3648 if (isBitwiseNot(N0))
3649 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3650 SDLoc DL(N);
3651 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3652 N0.getOperand(0), NotC);
3653 return CombineTo(
3654 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3655 }
3656
3657 // Iff the flag result is dead:
3658 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3659 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3660 // or the dependency between the instructions.
3661 if ((N0.getOpcode() == ISD::ADD ||
3662 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3663 N0.getValue(1) != CarryIn)) &&
3664 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3665 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3666 N0.getOperand(0), N0.getOperand(1), CarryIn);
3667
3668 /**
3669 * When one of the uaddo_carry argument is itself a carry, we may be facing
3670 * a diamond carry propagation. In which case we try to transform the DAG
3671 * to ensure linear carry propagation if that is possible.
3672 */
3673 if (auto Y = getAsCarry(TLI, N1)) {
3674 // Because both are carries, Y and Z can be swapped.
3675 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3676 return R;
3677 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3678 return R;
3679 }
3680
3681 return SDValue();
3682}
3683
3684SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3685 SDValue CarryIn, SDNode *N) {
3686 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3687 if (isBitwiseNot(N0)) {
3688 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3689 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3690 N0.getOperand(0), NotC);
3691 }
3692
3693 return SDValue();
3694}
3695
3696SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3697 SDValue N0 = N->getOperand(0);
3698 SDValue N1 = N->getOperand(1);
3699 SDValue CarryIn = N->getOperand(2);
3700 SDLoc DL(N);
3701
3702 // canonicalize constant to RHS
3703 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3704 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3705 if (N0C && !N1C)
3706 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3707
3708 // fold (saddo_carry x, y, false) -> (saddo x, y)
3709 if (isNullConstant(CarryIn)) {
3710 if (!LegalOperations ||
3711 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3712 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3713 }
3714
3715 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3716 return Combined;
3717
3718 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3719 return Combined;
3720
3721 return SDValue();
3722}
3723
3724// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3725// clamp/truncation if necessary.
3726static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3727 SDValue RHS, SelectionDAG &DAG,
3728 const SDLoc &DL) {
3729 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3730 "Illegal truncation");
3731
3732 if (DstVT == SrcVT)
3733 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3734
3735 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3736 // clamping RHS.
3738 DstVT.getScalarSizeInBits());
3739 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3740 return SDValue();
3741
3742 SDValue SatLimit =
3744 DstVT.getScalarSizeInBits()),
3745 DL, SrcVT);
3746 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3747 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3748 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3749 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3750}
3751
3752// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3753// usubsat(a,b), optionally as a truncated type.
3754SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3755 if (N->getOpcode() != ISD::SUB ||
3756 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3757 return SDValue();
3758
3759 EVT SubVT = N->getValueType(0);
3760 SDValue Op0 = N->getOperand(0);
3761 SDValue Op1 = N->getOperand(1);
3762
3763 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3764 // they may be converted to usubsat(a,b).
3765 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3766 SDValue MaxLHS = Op0.getOperand(0);
3767 SDValue MaxRHS = Op0.getOperand(1);
3768 if (MaxLHS == Op1)
3769 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3770 if (MaxRHS == Op1)
3771 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3772 }
3773
3774 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3775 SDValue MinLHS = Op1.getOperand(0);
3776 SDValue MinRHS = Op1.getOperand(1);
3777 if (MinLHS == Op0)
3778 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3779 if (MinRHS == Op0)
3780 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3781 }
3782
3783 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3784 if (Op1.getOpcode() == ISD::TRUNCATE &&
3785 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3786 Op1.getOperand(0).hasOneUse()) {
3787 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3788 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3789 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3790 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3791 DAG, DL);
3792 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3793 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3794 DAG, DL);
3795 }
3796
3797 return SDValue();
3798}
3799
3800// Since it may not be valid to emit a fold to zero for vector initializers
3801// check if we can before folding.
3802static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3803 SelectionDAG &DAG, bool LegalOperations) {
3804 if (!VT.isVector())
3805 return DAG.getConstant(0, DL, VT);
3806 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3807 return DAG.getConstant(0, DL, VT);
3808 return SDValue();
3809}
3810
3811SDValue DAGCombiner::visitSUB(SDNode *N) {
3812 SDValue N0 = N->getOperand(0);
3813 SDValue N1 = N->getOperand(1);
3814 EVT VT = N0.getValueType();
3815 unsigned BitWidth = VT.getScalarSizeInBits();
3816 SDLoc DL(N);
3817
3818 auto PeekThroughFreeze = [](SDValue N) {
3819 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3820 return N->getOperand(0);
3821 return N;
3822 };
3823
3824 // fold (sub x, x) -> 0
3825 // FIXME: Refactor this and xor and other similar operations together.
3826 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3827 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3828
3829 // fold (sub c1, c2) -> c3
3830 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3831 return C;
3832
3833 // fold vector ops
3834 if (VT.isVector()) {
3835 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3836 return FoldedVOp;
3837
3838 // fold (sub x, 0) -> x, vector edition
3840 return N0;
3841 }
3842
3843 if (SDValue NewSel = foldBinOpIntoSelect(N))
3844 return NewSel;
3845
3846 // fold (sub x, c) -> (add x, -c)
3848 return DAG.getNode(ISD::ADD, DL, VT, N0,
3849 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3850
3851 if (isNullOrNullSplat(N0)) {
3852 // Right-shifting everything out but the sign bit followed by negation is
3853 // the same as flipping arithmetic/logical shift type without the negation:
3854 // -(X >>u 31) -> (X >>s 31)
3855 // -(X >>s 31) -> (X >>u 31)
3856 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3858 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3859 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3860 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3861 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3862 }
3863 }
3864
3865 // 0 - X --> 0 if the sub is NUW.
3866 if (N->getFlags().hasNoUnsignedWrap())
3867 return N0;
3868
3870 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3871 // N1 must be 0 because negating the minimum signed value is undefined.
3872 if (N->getFlags().hasNoSignedWrap())
3873 return N0;
3874
3875 // 0 - X --> X if X is 0 or the minimum signed value.
3876 return N1;
3877 }
3878
3879 // Convert 0 - abs(x).
3880 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3882 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3883 return Result;
3884
3885 // Fold neg(splat(neg(x)) -> splat(x)
3886 if (VT.isVector()) {
3887 SDValue N1S = DAG.getSplatValue(N1, true);
3888 if (N1S && N1S.getOpcode() == ISD::SUB &&
3889 isNullConstant(N1S.getOperand(0)))
3890 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3891 }
3892 }
3893
3894 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3896 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3897
3898 // fold (A - (0-B)) -> A+B
3899 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3900 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3901
3902 // fold A-(A-B) -> B
3903 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3904 return N1.getOperand(1);
3905
3906 // fold (A+B)-A -> B
3907 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3908 return N0.getOperand(1);
3909
3910 // fold (A+B)-B -> A
3911 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3912 return N0.getOperand(0);
3913
3914 // fold (A+C1)-C2 -> A+(C1-C2)
3915 if (N0.getOpcode() == ISD::ADD) {
3916 SDValue N01 = N0.getOperand(1);
3917 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3918 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3919 }
3920
3921 // fold C2-(A+C1) -> (C2-C1)-A
3922 if (N1.getOpcode() == ISD::ADD) {
3923 SDValue N11 = N1.getOperand(1);
3924 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3925 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3926 }
3927
3928 // fold (A-C1)-C2 -> A-(C1+C2)
3929 if (N0.getOpcode() == ISD::SUB) {
3930 SDValue N01 = N0.getOperand(1);
3931 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3932 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3933 }
3934
3935 // fold (c1-A)-c2 -> (c1-c2)-A
3936 if (N0.getOpcode() == ISD::SUB) {
3937 SDValue N00 = N0.getOperand(0);
3938 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3939 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3940 }
3941
3942 SDValue A, B, C;
3943
3944 // fold ((A+(B+C))-B) -> A+C
3945 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3946 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3947
3948 // fold ((A+(B-C))-B) -> A-C
3949 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3950 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3951
3952 // fold ((A-(B-C))-C) -> A-B
3953 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3954 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3955
3956 // fold (A-(B-C)) -> A+(C-B)
3957 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3958 return DAG.getNode(ISD::ADD, DL, VT, N0,
3959 DAG.getNode(ISD::SUB, DL, VT, C, B));
3960
3961 // A - (A & B) -> A & (~B)
3962 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3963 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3964 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3965
3966 // fold (A - (-B * C)) -> (A + (B * C))
3967 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3968 return DAG.getNode(ISD::ADD, DL, VT, N0,
3969 DAG.getNode(ISD::MUL, DL, VT, B, C));
3970
3971 // If either operand of a sub is undef, the result is undef
3972 if (N0.isUndef())
3973 return N0;
3974 if (N1.isUndef())
3975 return N1;
3976
3977 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3978 return V;
3979
3980 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3981 return V;
3982
3983 // Try to match AVGCEIL fixedwidth pattern
3984 if (SDValue V = foldSubToAvg(N, DL))
3985 return V;
3986
3987 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3988 return V;
3989
3990 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3991 return V;
3992
3993 // (A - B) - 1 -> add (xor B, -1), A
3995 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3996
3997 // Look for:
3998 // sub y, (xor x, -1)
3999 // And if the target does not like this form then turn into:
4000 // add (add x, y), 1
4001 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4002 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4003 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4004 }
4005
4006 // Hoist one-use addition by non-opaque constant:
4007 // (x + C) - y -> (x - y) + C
4008 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4009 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4010 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4011 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4012 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4013 }
4014 // y - (x + C) -> (y - x) - C
4015 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4016 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4017 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4018 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4019 }
4020 // (x - C) - y -> (x - y) - C
4021 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4022 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4023 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4024 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4025 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4026 }
4027 // (C - x) - y -> C - (x + y)
4028 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4029 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4030 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4031 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4032 }
4033
4034 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4035 // rather than 'sub 0/1' (the sext should get folded).
4036 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4037 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4038 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4039 TLI.getBooleanContents(VT) ==
4041 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4042 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4043 }
4044
4045 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4046 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4048 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4049 return DAG.getNode(ISD::ABS, DL, VT, A);
4050
4051 // If the relocation model supports it, consider symbol offsets.
4052 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4053 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4054 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4055 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4056 if (GA->getGlobal() == GB->getGlobal())
4057 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4058 DL, VT);
4059 }
4060
4061 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4062 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4063 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4064 if (TN->getVT() == MVT::i1) {
4065 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4066 DAG.getConstant(1, DL, VT));
4067 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4068 }
4069 }
4070
4071 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4072 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4073 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4074 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4075 }
4076
4077 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4078 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4079 APInt NewStep = -N1.getConstantOperandAPInt(0);
4080 return DAG.getNode(ISD::ADD, DL, VT, N0,
4081 DAG.getStepVector(DL, VT, NewStep));
4082 }
4083
4084 // Prefer an add for more folding potential and possibly better codegen:
4085 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4086 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4087 SDValue ShAmt = N1.getOperand(1);
4088 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4089 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4090 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4091 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4092 }
4093 }
4094
4095 // As with the previous fold, prefer add for more folding potential.
4096 // Subtracting SMIN/0 is the same as adding SMIN/0:
4097 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4098 if (N1.getOpcode() == ISD::SHL) {
4100 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4101 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4102 }
4103
4104 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4105 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4106 N0.getResNo() == 0 && N0.hasOneUse())
4107 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4108 N0.getOperand(0), N1, N0.getOperand(2));
4109
4111 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4112 if (SDValue Carry = getAsCarry(TLI, N0)) {
4113 SDValue X = N1;
4114 SDValue Zero = DAG.getConstant(0, DL, VT);
4115 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4116 return DAG.getNode(ISD::UADDO_CARRY, DL,
4117 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4118 Carry);
4119 }
4120 }
4121
4122 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4123 // sub C0, X --> xor X, C0
4124 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4125 if (!C0->isOpaque()) {
4126 const APInt &C0Val = C0->getAPIntValue();
4127 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4128 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4129 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4130 }
4131 }
4132
4133 // smax(a,b) - smin(a,b) --> abds(a,b)
4134 if (hasOperation(ISD::ABDS, VT) &&
4135 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4137 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4138
4139 // umax(a,b) - umin(a,b) --> abdu(a,b)
4140 if (hasOperation(ISD::ABDU, VT) &&
4141 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4143 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4144
4145 return SDValue();
4146}
4147
4148SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4149 unsigned Opcode = N->getOpcode();
4150 SDValue N0 = N->getOperand(0);
4151 SDValue N1 = N->getOperand(1);
4152 EVT VT = N0.getValueType();
4153 bool IsSigned = Opcode == ISD::SSUBSAT;
4154 SDLoc DL(N);
4155
4156 // fold (sub_sat x, undef) -> 0
4157 if (N0.isUndef() || N1.isUndef())
4158 return DAG.getConstant(0, DL, VT);
4159
4160 // fold (sub_sat x, x) -> 0
4161 if (N0 == N1)
4162 return DAG.getConstant(0, DL, VT);
4163
4164 // fold (sub_sat c1, c2) -> c3
4165 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4166 return C;
4167
4168 // fold vector ops
4169 if (VT.isVector()) {
4170 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4171 return FoldedVOp;
4172
4173 // fold (sub_sat x, 0) -> x, vector edition
4175 return N0;
4176 }
4177
4178 // fold (sub_sat x, 0) -> x
4179 if (isNullConstant(N1))
4180 return N0;
4181
4182 // If it cannot overflow, transform into an sub.
4183 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4184 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4185
4186 return SDValue();
4187}
4188
4189SDValue DAGCombiner::visitSUBC(SDNode *N) {
4190 SDValue N0 = N->getOperand(0);
4191 SDValue N1 = N->getOperand(1);
4192 EVT VT = N0.getValueType();
4193 SDLoc DL(N);
4194
4195 // If the flag result is dead, turn this into an SUB.
4196 if (!N->hasAnyUseOfValue(1))
4197 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4198 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4199
4200 // fold (subc x, x) -> 0 + no borrow
4201 if (N0 == N1)
4202 return CombineTo(N, DAG.getConstant(0, DL, VT),
4203 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4204
4205 // fold (subc x, 0) -> x + no borrow
4206 if (isNullConstant(N1))
4207 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4208
4209 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4210 if (isAllOnesConstant(N0))
4211 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4212 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4213
4214 return SDValue();
4215}
4216
4217SDValue DAGCombiner::visitSUBO(SDNode *N) {
4218 SDValue N0 = N->getOperand(0);
4219 SDValue N1 = N->getOperand(1);
4220 EVT VT = N0.getValueType();
4221 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4222
4223 EVT CarryVT = N->getValueType(1);
4224 SDLoc DL(N);
4225
4226 // If the flag result is dead, turn this into an SUB.
4227 if (!N->hasAnyUseOfValue(1))
4228 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4229 DAG.getUNDEF(CarryVT));
4230
4231 // fold (subo x, x) -> 0 + no borrow
4232 if (N0 == N1)
4233 return CombineTo(N, DAG.getConstant(0, DL, VT),
4234 DAG.getConstant(0, DL, CarryVT));
4235
4236 // fold (subox, c) -> (addo x, -c)
4238 if (IsSigned && !N1C->isMinSignedValue())
4239 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4240 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4241
4242 // fold (subo x, 0) -> x + no borrow
4243 if (isNullOrNullSplat(N1))
4244 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4245
4246 // If it cannot overflow, transform into an sub.
4247 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4248 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4249 DAG.getConstant(0, DL, CarryVT));
4250
4251 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4252 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4253 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4254 DAG.getConstant(0, DL, CarryVT));
4255
4256 return SDValue();
4257}
4258
4259SDValue DAGCombiner::visitSUBE(SDNode *N) {
4260 SDValue N0 = N->getOperand(0);
4261 SDValue N1 = N->getOperand(1);
4262 SDValue CarryIn = N->getOperand(2);
4263
4264 // fold (sube x, y, false) -> (subc x, y)
4265 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4266 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4267
4268 return SDValue();
4269}
4270
4271SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4272 SDValue N0 = N->getOperand(0);
4273 SDValue N1 = N->getOperand(1);
4274 SDValue CarryIn = N->getOperand(2);
4275
4276 // fold (usubo_carry x, y, false) -> (usubo x, y)
4277 if (isNullConstant(CarryIn)) {
4278 if (!LegalOperations ||
4279 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4280 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4281 }
4282
4283 return SDValue();
4284}
4285
4286SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4287 SDValue N0 = N->getOperand(0);
4288 SDValue N1 = N->getOperand(1);
4289 SDValue CarryIn = N->getOperand(2);
4290
4291 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4292 if (isNullConstant(CarryIn)) {
4293 if (!LegalOperations ||
4294 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4295 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4296 }
4297
4298 return SDValue();
4299}
4300
4301// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4302// UMULFIXSAT here.
4303SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4304 SDValue N0 = N->getOperand(0);
4305 SDValue N1 = N->getOperand(1);
4306 SDValue Scale = N->getOperand(2);
4307 EVT VT = N0.getValueType();
4308
4309 // fold (mulfix x, undef, scale) -> 0
4310 if (N0.isUndef() || N1.isUndef())
4311 return DAG.getConstant(0, SDLoc(N), VT);
4312
4313 // Canonicalize constant to RHS (vector doesn't have to splat)
4316 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4317
4318 // fold (mulfix x, 0, scale) -> 0
4319 if (isNullConstant(N1))
4320 return DAG.getConstant(0, SDLoc(N), VT);
4321
4322 return SDValue();
4323}
4324
4325template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4326 SDValue N0 = N->getOperand(0);
4327 SDValue N1 = N->getOperand(1);
4328 EVT VT = N0.getValueType();
4329 unsigned BitWidth = VT.getScalarSizeInBits();
4330 SDLoc DL(N);
4331 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4332 MatchContextClass Matcher(DAG, TLI, N);
4333
4334 // fold (mul x, undef) -> 0
4335 if (N0.isUndef() || N1.isUndef())
4336 return DAG.getConstant(0, DL, VT);
4337
4338 // fold (mul c1, c2) -> c1*c2
4339 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4340 return C;
4341
4342 // canonicalize constant to RHS (vector doesn't have to splat)
4345 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4346
4347 bool N1IsConst = false;
4348 bool N1IsOpaqueConst = false;
4349 APInt ConstValue1;
4350
4351 // fold vector ops
4352 if (VT.isVector()) {
4353 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4354 if (!UseVP)
4355 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4356 return FoldedVOp;
4357
4358 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4359 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4360 "Splat APInt should be element width");
4361 } else {
4362 N1IsConst = isa<ConstantSDNode>(N1);
4363 if (N1IsConst) {
4364 ConstValue1 = N1->getAsAPIntVal();
4365 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4366 }
4367 }
4368
4369 // fold (mul x, 0) -> 0
4370 if (N1IsConst && ConstValue1.isZero())
4371 return N1;
4372
4373 // fold (mul x, 1) -> x
4374 if (N1IsConst && ConstValue1.isOne())
4375 return N0;
4376
4377 if (!UseVP)
4378 if (SDValue NewSel = foldBinOpIntoSelect(N))
4379 return NewSel;
4380
4381 // fold (mul x, -1) -> 0-x
4382 if (N1IsConst && ConstValue1.isAllOnes())
4383 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4384
4385 // fold (mul x, (1 << c)) -> x << c
4386 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4387 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4388 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4389 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4390 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4391 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4392 }
4393 }
4394
4395 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4396 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4397 unsigned Log2Val = (-ConstValue1).logBase2();
4398 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4399
4400 // FIXME: If the input is something that is easily negated (e.g. a
4401 // single-use add), we should put the negate there.
4402 return Matcher.getNode(
4403 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4404 Matcher.getNode(ISD::SHL, DL, VT, N0,
4405 DAG.getConstant(Log2Val, DL, ShiftVT)));
4406 }
4407
4408 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4409 // hi result is in use in case we hit this mid-legalization.
4410 if (!UseVP) {
4411 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4412 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4413 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4414 // TODO: Can we match commutable operands with getNodeIfExists?
4415 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4416 if (LoHi->hasAnyUseOfValue(1))
4417 return SDValue(LoHi, 0);
4418 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4419 if (LoHi->hasAnyUseOfValue(1))
4420 return SDValue(LoHi, 0);
4421 }
4422 }
4423 }
4424
4425 // Try to transform:
4426 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4427 // mul x, (2^N + 1) --> add (shl x, N), x
4428 // mul x, (2^N - 1) --> sub (shl x, N), x
4429 // Examples: x * 33 --> (x << 5) + x
4430 // x * 15 --> (x << 4) - x
4431 // x * -33 --> -((x << 5) + x)
4432 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4433 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4434 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4435 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4436 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4437 // x * 0xf800 --> (x << 16) - (x << 11)
4438 // x * -0x8800 --> -((x << 15) + (x << 11))
4439 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4440 if (!UseVP && N1IsConst &&
4441 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4442 // TODO: We could handle more general decomposition of any constant by
4443 // having the target set a limit on number of ops and making a
4444 // callback to determine that sequence (similar to sqrt expansion).
4445 unsigned MathOp = ISD::DELETED_NODE;
4446 APInt MulC = ConstValue1.abs();
4447 // The constant `2` should be treated as (2^0 + 1).
4448 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4449 MulC.lshrInPlace(TZeros);
4450 if ((MulC - 1).isPowerOf2())
4451 MathOp = ISD::ADD;
4452 else if ((MulC + 1).isPowerOf2())
4453 MathOp = ISD::SUB;
4454
4455 if (MathOp != ISD::DELETED_NODE) {
4456 unsigned ShAmt =
4457 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4458 ShAmt += TZeros;
4459 assert(ShAmt < BitWidth &&
4460 "multiply-by-constant generated out of bounds shift");
4461 SDValue Shl =
4462 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4463 SDValue R =
4464 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4465 DAG.getNode(ISD::SHL, DL, VT, N0,
4466 DAG.getConstant(TZeros, DL, VT)))
4467 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4468 if (ConstValue1.isNegative())
4469 R = DAG.getNegative(R, DL, VT);
4470 return R;
4471 }
4472 }
4473
4474 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4475 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4476 SDValue N01 = N0.getOperand(1);
4477 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4478 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4479 }
4480
4481 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4482 // use.
4483 {
4484 SDValue Sh, Y;
4485
4486 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4487 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4489 Sh = N0; Y = N1;
4490 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4492 Sh = N1; Y = N0;
4493 }
4494
4495 if (Sh.getNode()) {
4496 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4497 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4498 }
4499 }
4500
4501 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4502 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4506 return Matcher.getNode(
4507 ISD::ADD, DL, VT,
4508 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4509 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4510
4511 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4513 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4514 const APInt &C0 = N0.getConstantOperandAPInt(0);
4515 const APInt &C1 = NC1->getAPIntValue();
4516 return DAG.getVScale(DL, VT, C0 * C1);
4517 }
4518
4519 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4520 APInt MulVal;
4521 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4522 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4523 const APInt &C0 = N0.getConstantOperandAPInt(0);
4524 APInt NewStep = C0 * MulVal;
4525 return DAG.getStepVector(DL, VT, NewStep);
4526 }
4527
4528 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4529 SDValue X;
4530 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4532 N, Matcher,
4534 m_Deferred(X)))) {
4535 return Matcher.getNode(ISD::ABS, DL, VT, X);
4536 }
4537
4538 // Fold ((mul x, 0/undef) -> 0,
4539 // (mul x, 1) -> x) -> x)
4540 // -> and(x, mask)
4541 // We can replace vectors with '0' and '1' factors with a clearing mask.
4542 if (VT.isFixedLengthVector()) {
4543 unsigned NumElts = VT.getVectorNumElements();
4544 SmallBitVector ClearMask;
4545 ClearMask.reserve(NumElts);
4546 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4547 if (!V || V->isZero()) {
4548 ClearMask.push_back(true);
4549 return true;
4550 }
4551 ClearMask.push_back(false);
4552 return V->isOne();
4553 };
4554 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4555 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4556 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4557 EVT LegalSVT = N1.getOperand(0).getValueType();
4558 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4559 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4561 for (unsigned I = 0; I != NumElts; ++I)
4562 if (ClearMask[I])
4563 Mask[I] = Zero;
4564 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4565 }
4566 }
4567
4568 // reassociate mul
4569 // TODO: Change reassociateOps to support vp ops.
4570 if (!UseVP)
4571 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4572 return RMUL;
4573
4574 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4575 // TODO: Change reassociateReduction to support vp ops.
4576 if (!UseVP)
4577 if (SDValue SD =
4578 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4579 return SD;
4580
4581 // Simplify the operands using demanded-bits information.
4583 return SDValue(N, 0);
4584
4585 return SDValue();
4586}
4587
4588/// Return true if divmod libcall is available.
4590 const TargetLowering &TLI) {
4591 RTLIB::Libcall LC;
4592 EVT NodeType = Node->getValueType(0);
4593 if (!NodeType.isSimple())
4594 return false;
4595 switch (NodeType.getSimpleVT().SimpleTy) {
4596 default: return false; // No libcall for vector types.
4597 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4598 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4599 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4600 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4601 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4602 }
4603
4604 return TLI.getLibcallName(LC) != nullptr;
4605}
4606
4607/// Issue divrem if both quotient and remainder are needed.
4608SDValue DAGCombiner::useDivRem(SDNode *Node) {
4609 if (Node->use_empty())
4610 return SDValue(); // This is a dead node, leave it alone.
4611
4612 unsigned Opcode = Node->getOpcode();
4613 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4614 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4615
4616 // DivMod lib calls can still work on non-legal types if using lib-calls.
4617 EVT VT = Node->getValueType(0);
4618 if (VT.isVector() || !VT.isInteger())
4619 return SDValue();
4620
4621 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4622 return SDValue();
4623
4624 // If DIVREM is going to get expanded into a libcall,
4625 // but there is no libcall available, then don't combine.
4626 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4628 return SDValue();
4629
4630 // If div is legal, it's better to do the normal expansion
4631 unsigned OtherOpcode = 0;
4632 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4633 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4634 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4635 return SDValue();
4636 } else {
4637 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4638 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4639 return SDValue();
4640 }
4641
4642 SDValue Op0 = Node->getOperand(0);
4643 SDValue Op1 = Node->getOperand(1);
4644 SDValue combined;
4645 for (SDNode *User : Op0->uses()) {
4646 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4647 User->use_empty())
4648 continue;
4649 // Convert the other matching node(s), too;
4650 // otherwise, the DIVREM may get target-legalized into something
4651 // target-specific that we won't be able to recognize.
4652 unsigned UserOpc = User->getOpcode();
4653 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4654 User->getOperand(0) == Op0 &&
4655 User->getOperand(1) == Op1) {
4656 if (!combined) {
4657 if (UserOpc == OtherOpcode) {
4658 SDVTList VTs = DAG.getVTList(VT, VT);
4659 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4660 } else if (UserOpc == DivRemOpc) {
4661 combined = SDValue(User, 0);
4662 } else {
4663 assert(UserOpc == Opcode);
4664 continue;
4665 }
4666 }
4667 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4668 CombineTo(User, combined);
4669 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4670 CombineTo(User, combined.getValue(1));
4671 }
4672 }
4673 return combined;
4674}
4675
4677 SDValue N0 = N->getOperand(0);
4678 SDValue N1 = N->getOperand(1);
4679 EVT VT = N->getValueType(0);
4680 SDLoc DL(N);
4681
4682 unsigned Opc = N->getOpcode();
4683 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4685
4686 // X / undef -> undef
4687 // X % undef -> undef
4688 // X / 0 -> undef
4689 // X % 0 -> undef
4690 // NOTE: This includes vectors where any divisor element is zero/undef.
4691 if (DAG.isUndef(Opc, {N0, N1}))
4692 return DAG.getUNDEF(VT);
4693
4694 // undef / X -> 0
4695 // undef % X -> 0
4696 if (N0.isUndef())
4697 return DAG.getConstant(0, DL, VT);
4698
4699 // 0 / X -> 0
4700 // 0 % X -> 0
4702 if (N0C && N0C->isZero())
4703 return N0;
4704
4705 // X / X -> 1
4706 // X % X -> 0
4707 if (N0 == N1)
4708 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4709
4710 // X / 1 -> X
4711 // X % 1 -> 0
4712 // If this is a boolean op (single-bit element type), we can't have
4713 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4714 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4715 // it's a 1.
4716 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4717 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4718
4719 return SDValue();
4720}
4721
4722SDValue DAGCombiner::visitSDIV(SDNode *N) {
4723 SDValue N0 = N->getOperand(0);
4724 SDValue N1 = N->getOperand(1);
4725 EVT VT = N->getValueType(0);
4726 EVT CCVT = getSetCCResultType(VT);
4727 SDLoc DL(N);
4728
4729 // fold (sdiv c1, c2) -> c1/c2
4730 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4731 return C;
4732
4733 // fold vector ops
4734 if (VT.isVector())
4735 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4736 return FoldedVOp;
4737
4738 // fold (sdiv X, -1) -> 0-X
4740 if (N1C && N1C->isAllOnes())
4741 return DAG.getNegative(N0, DL, VT);
4742
4743 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4744 if (N1C && N1C->isMinSignedValue())
4745 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4746 DAG.getConstant(1, DL, VT),
4747 DAG.getConstant(0, DL, VT));
4748
4749 if (SDValue V = simplifyDivRem(N, DAG))
4750 return V;
4751
4752 if (SDValue NewSel = foldBinOpIntoSelect(N))
4753 return NewSel;
4754
4755 // If we know the sign bits of both operands are zero, strength reduce to a
4756 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4757 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4758 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4759
4760 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4761 // If the corresponding remainder node exists, update its users with
4762 // (Dividend - (Quotient * Divisor).
4763 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4764 { N0, N1 })) {
4765 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4766 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4767 AddToWorklist(Mul.getNode());
4768 AddToWorklist(Sub.getNode());
4769 CombineTo(RemNode, Sub);
4770 }
4771 return V;
4772 }
4773
4774 // sdiv, srem -> sdivrem
4775 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4776 // true. Otherwise, we break the simplification logic in visitREM().
4778 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4779 if (SDValue DivRem = useDivRem(N))
4780 return DivRem;
4781
4782 return SDValue();
4783}
4784
4785static bool isDivisorPowerOfTwo(SDValue Divisor) {
4786 // Helper for determining whether a value is a power-2 constant scalar or a
4787 // vector of such elements.
4788 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4789 if (C->isZero() || C->isOpaque())
4790 return false;
4791 if (C->getAPIntValue().isPowerOf2())
4792 return true;
4793 if (C->getAPIntValue().isNegatedPowerOf2())
4794 return true;
4795 return false;
4796 };
4797
4798 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4799}
4800
4801SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4802 SDLoc DL(N);
4803 EVT VT = N->getValueType(0);
4804 EVT CCVT = getSetCCResultType(VT);
4805 unsigned BitWidth = VT.getScalarSizeInBits();
4806
4807 // fold (sdiv X, pow2) -> simple ops after legalize
4808 // FIXME: We check for the exact bit here because the generic lowering gives
4809 // better results in that case. The target-specific lowering should learn how
4810 // to handle exact sdivs efficiently.
4811 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4812 // Target-specific implementation of sdiv x, pow2.
4813 if (SDValue Res = BuildSDIVPow2(N))
4814 return Res;
4815
4816 // Create constants that are functions of the shift amount value.
4817 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4818 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4819 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4820 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4821 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4822 if (!isConstantOrConstantVector(Inexact))
4823 return SDValue();
4824
4825 // Splat the sign bit into the register
4826 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4827 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4828 AddToWorklist(Sign.getNode());
4829
4830 // Add (N0 < 0) ? abs2 - 1 : 0;
4831 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4832 AddToWorklist(Srl.getNode());
4833 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4834 AddToWorklist(Add.getNode());
4835 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4836 AddToWorklist(Sra.getNode());
4837
4838 // Special case: (sdiv X, 1) -> X
4839 // Special Case: (sdiv X, -1) -> 0-X
4840 SDValue One = DAG.getConstant(1, DL, VT);
4842 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4843 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4844 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4845 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4846
4847 // If dividing by a positive value, we're done. Otherwise, the result must
4848 // be negated.
4849 SDValue Zero = DAG.getConstant(0, DL, VT);
4850 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4851
4852 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4853 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4854 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4855 return Res;
4856 }
4857
4858 // If integer divide is expensive and we satisfy the requirements, emit an
4859 // alternate sequence. Targets may check function attributes for size/speed
4860 // trade-offs.
4863 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4864 if (SDValue Op = BuildSDIV(N))
4865 return Op;
4866
4867 return SDValue();
4868}
4869
4870SDValue DAGCombiner::visitUDIV(SDNode *N) {
4871 SDValue N0 = N->getOperand(0);
4872 SDValue N1 = N->getOperand(1);
4873 EVT VT = N->getValueType(0);
4874 EVT CCVT = getSetCCResultType(VT);
4875 SDLoc DL(N);
4876
4877 // fold (udiv c1, c2) -> c1/c2
4878 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4879 return C;
4880
4881 // fold vector ops
4882 if (VT.isVector())
4883 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4884 return FoldedVOp;
4885
4886 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4888 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4889 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4890 DAG.getConstant(1, DL, VT),
4891 DAG.getConstant(0, DL, VT));
4892 }
4893
4894 if (SDValue V = simplifyDivRem(N, DAG))
4895 return V;
4896
4897 if (SDValue NewSel = foldBinOpIntoSelect(N))
4898 return NewSel;
4899
4900 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4901 // If the corresponding remainder node exists, update its users with
4902 // (Dividend - (Quotient * Divisor).
4903 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4904 { N0, N1 })) {
4905 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4906 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4907 AddToWorklist(Mul.getNode());
4908 AddToWorklist(Sub.getNode());
4909 CombineTo(RemNode, Sub);
4910 }
4911 return V;
4912 }
4913
4914 // sdiv, srem -> sdivrem
4915 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4916 // true. Otherwise, we break the simplification logic in visitREM().
4918 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4919 if (SDValue DivRem = useDivRem(N))
4920 return DivRem;
4921
4922 return SDValue();
4923}
4924
4925SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4926 SDLoc DL(N);
4927 EVT VT = N->getValueType(0);
4928
4929 // fold (udiv x, (1 << c)) -> x >>u c
4930 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4931 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4932 AddToWorklist(LogBase2.getNode());
4933
4934 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4935 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4936 AddToWorklist(Trunc.getNode());
4937 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4938 }
4939 }
4940
4941 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4942 if (N1.getOpcode() == ISD::SHL) {
4943 SDValue N10 = N1.getOperand(0);
4944 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4945 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4946 AddToWorklist(LogBase2.getNode());
4947
4948 EVT ADDVT = N1.getOperand(1).getValueType();
4949 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4950 AddToWorklist(Trunc.getNode());
4951 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4952 AddToWorklist(Add.getNode());
4953 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4954 }
4955 }
4956 }
4957
4958 // fold (udiv x, c) -> alternate
4961 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4962 if (SDValue Op = BuildUDIV(N))
4963 return Op;
4964
4965 return SDValue();
4966}
4967
4968SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4969 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4970 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4971 // Target-specific implementation of srem x, pow2.
4972 if (SDValue Res = BuildSREMPow2(N))
4973 return Res;
4974 }
4975 return SDValue();
4976}
4977
4978// handles ISD::SREM and ISD::UREM
4979SDValue DAGCombiner::visitREM(SDNode *N) {
4980 unsigned Opcode = N->getOpcode();
4981 SDValue N0 = N->getOperand(0);
4982 SDValue N1 = N->getOperand(1);
4983 EVT VT = N->getValueType(0);
4984 EVT CCVT = getSetCCResultType(VT);
4985
4986 bool isSigned = (Opcode == ISD::SREM);
4987 SDLoc DL(N);
4988
4989 // fold (rem c1, c2) -> c1%c2
4990 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4991 return C;
4992
4993 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4994 // Freeze the numerator to avoid a miscompile with an undefined value.
4995 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4996 CCVT.isVector() == VT.isVector()) {
4997 SDValue F0 = DAG.getFreeze(N0);
4998 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4999 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5000 }
5001
5002 if (SDValue V = simplifyDivRem(N, DAG))
5003 return V;
5004
5005 if (SDValue NewSel = foldBinOpIntoSelect(N))
5006 return NewSel;
5007
5008 if (isSigned) {
5009 // If we know the sign bits of both operands are zero, strength reduce to a
5010 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5011 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5012 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5013 } else {
5014 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5015 // fold (urem x, pow2) -> (and x, pow2-1)
5016 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5017 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5018 AddToWorklist(Add.getNode());
5019 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5020 }
5021 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5022 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5023 // TODO: We should sink the following into isKnownToBePowerOfTwo
5024 // using a OrZero parameter analogous to our handling in ValueTracking.
5025 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5027 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5028 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5029 AddToWorklist(Add.getNode());
5030 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5031 }
5032 }
5033
5035
5036 // If X/C can be simplified by the division-by-constant logic, lower
5037 // X%C to the equivalent of X-X/C*C.
5038 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5039 // speculative DIV must not cause a DIVREM conversion. We guard against this
5040 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5041 // combine will not return a DIVREM. Regardless, checking cheapness here
5042 // makes sense since the simplification results in fatter code.
5043 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5044 if (isSigned) {
5045 // check if we can build faster implementation for srem
5046 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5047 return OptimizedRem;
5048 }
5049
5050 SDValue OptimizedDiv =
5051 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5052 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5053 // If the equivalent Div node also exists, update its users.
5054 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5055 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5056 { N0, N1 }))
5057 CombineTo(DivNode, OptimizedDiv);
5058 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5059 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5060 AddToWorklist(OptimizedDiv.getNode());
5061 AddToWorklist(Mul.getNode());
5062 return Sub;
5063 }
5064 }
5065
5066 // sdiv, srem -> sdivrem
5067 if (SDValue DivRem = useDivRem(N))
5068 return DivRem.getValue(1);
5069
5070 return SDValue();
5071}
5072
5073SDValue DAGCombiner::visitMULHS(SDNode *N) {
5074 SDValue N0 = N->getOperand(0);
5075 SDValue N1 = N->getOperand(1);
5076 EVT VT = N->getValueType(0);
5077 SDLoc DL(N);
5078
5079 // fold (mulhs c1, c2)
5080 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5081 return C;
5082
5083 // canonicalize constant to RHS.
5086 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5087
5088 if (VT.isVector()) {
5089 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5090 return FoldedVOp;
5091
5092 // fold (mulhs x, 0) -> 0
5093 // do not return N1, because undef node may exist.
5095 return DAG.getConstant(0, DL, VT);
5096 }
5097
5098 // fold (mulhs x, 0) -> 0
5099 if (isNullConstant(N1))
5100 return N1;
5101
5102 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5103 if (isOneConstant(N1))
5104 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
5107
5108 // fold (mulhs x, undef) -> 0
5109 if (N0.isUndef() || N1.isUndef())
5110 return DAG.getConstant(0, DL, VT);
5111
5112 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5113 // plus a shift.
5114 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5115 !VT.isVector()) {
5116 MVT Simple = VT.getSimpleVT();
5117 unsigned SimpleSize = Simple.getSizeInBits();
5118 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5119 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5120 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5121 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5122 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5123 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5124 DAG.getConstant(SimpleSize, DL,
5126 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5127 }
5128 }
5129
5130 return SDValue();
5131}
5132
5133SDValue DAGCombiner::visitMULHU(SDNode *N) {
5134 SDValue N0 = N->getOperand(0);
5135 SDValue N1 = N->getOperand(1);
5136 EVT VT = N->getValueType(0);
5137 SDLoc DL(N);
5138
5139 // fold (mulhu c1, c2)
5140 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5141 return C;
5142
5143 // canonicalize constant to RHS.
5146 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5147
5148 if (VT.isVector()) {
5149 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5150 return FoldedVOp;
5151
5152 // fold (mulhu x, 0) -> 0
5153 // do not return N1, because undef node may exist.
5155 return DAG.getConstant(0, DL, VT);
5156 }
5157
5158 // fold (mulhu x, 0) -> 0
5159 if (isNullConstant(N1))
5160 return N1;
5161
5162 // fold (mulhu x, 1) -> 0
5163 if (isOneConstant(N1))
5164 return DAG.getConstant(0, DL, N0.getValueType());
5165
5166 // fold (mulhu x, undef) -> 0
5167 if (N0.isUndef() || N1.isUndef())
5168 return DAG.getConstant(0, DL, VT);
5169
5170 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5171 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5172 hasOperation(ISD::SRL, VT)) {
5173 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5174 unsigned NumEltBits = VT.getScalarSizeInBits();
5175 SDValue SRLAmt = DAG.getNode(
5176 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5177 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5178 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5179 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5180 }
5181 }
5182
5183 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5184 // plus a shift.
5185 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5186 !VT.isVector()) {
5187 MVT Simple = VT.getSimpleVT();
5188 unsigned SimpleSize = Simple.getSizeInBits();
5189 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5190 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5191 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5192 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5193 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5194 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5195 DAG.getConstant(SimpleSize, DL,
5197 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5198 }
5199 }
5200
5201 // Simplify the operands using demanded-bits information.
5202 // We don't have demanded bits support for MULHU so this just enables constant
5203 // folding based on known bits.
5205 return SDValue(N, 0);
5206
5207 return SDValue();
5208}
5209
5210SDValue DAGCombiner::visitAVG(SDNode *N) {
5211 unsigned Opcode = N->getOpcode();
5212 SDValue N0 = N->getOperand(0);
5213 SDValue N1 = N->getOperand(1);
5214 EVT VT = N->getValueType(0);
5215 SDLoc DL(N);
5216 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5217
5218 // fold (avg c1, c2)
5219 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5220 return C;
5221
5222 // canonicalize constant to RHS.
5225 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5226
5227 if (VT.isVector())
5228 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5229 return FoldedVOp;
5230
5231 // fold (avg x, undef) -> x
5232 if (N0.isUndef())
5233 return N1;
5234 if (N1.isUndef())
5235 return N0;
5236
5237 // fold (avg x, x) --> x
5238 if (N0 == N1 && Level >= AfterLegalizeTypes)
5239 return N0;
5240
5241 // fold (avgfloor x, 0) -> x >> 1
5242 SDValue X, Y;
5244 return DAG.getNode(ISD::SRA, DL, VT, X,
5245 DAG.getShiftAmountConstant(1, VT, DL));
5247 return DAG.getNode(ISD::SRL, DL, VT, X,
5248 DAG.getShiftAmountConstant(1, VT, DL));
5249
5250 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5251 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5252 if (!IsSigned &&
5253 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5254 X.getValueType() == Y.getValueType() &&
5255 hasOperation(Opcode, X.getValueType())) {
5256 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5257 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5258 }
5259 if (IsSigned &&
5260 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5261 X.getValueType() == Y.getValueType() &&
5262 hasOperation(Opcode, X.getValueType())) {
5263 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5264 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5265 }
5266
5267 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5268 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5269 // Check if avgflooru isn't legal/custom but avgceilu is.
5270 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5271 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5272 if (DAG.isKnownNeverZero(N1))
5273 return DAG.getNode(
5274 ISD::AVGCEILU, DL, VT, N0,
5275 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5276 if (DAG.isKnownNeverZero(N0))
5277 return DAG.getNode(
5278 ISD::AVGCEILU, DL, VT, N1,
5279 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5280 }
5281
5282 return SDValue();
5283}
5284
5285SDValue DAGCombiner::visitABD(SDNode *N) {
5286 unsigned Opcode = N->getOpcode();
5287 SDValue N0 = N->getOperand(0);
5288 SDValue N1 = N->getOperand(1);
5289 EVT VT = N->getValueType(0);
5290 SDLoc DL(N);
5291
5292 // fold (abd c1, c2)
5293 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5294 return C;
5295
5296 // canonicalize constant to RHS.
5299 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5300
5301 if (VT.isVector())
5302 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5303 return FoldedVOp;
5304
5305 // fold (abd x, undef) -> 0
5306 if (N0.isUndef() || N1.isUndef())
5307 return DAG.getConstant(0, DL, VT);
5308
5309 SDValue X;
5310
5311 // fold (abds x, 0) -> abs x
5313 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5314 return DAG.getNode(ISD::ABS, DL, VT, X);
5315
5316 // fold (abdu x, 0) -> x
5318 return X;
5319
5320 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5321 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5322 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5323 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5324
5325 return SDValue();
5326}
5327
5328/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5329/// give the opcodes for the two computations that are being performed. Return
5330/// true if a simplification was made.
5331SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5332 unsigned HiOp) {
5333 // If the high half is not needed, just compute the low half.
5334 bool HiExists = N->hasAnyUseOfValue(1);
5335 if (!HiExists && (!LegalOperations ||
5336 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5337 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5338 return CombineTo(N, Res, Res);
5339 }
5340
5341 // If the low half is not needed, just compute the high half.
5342 bool LoExists = N->hasAnyUseOfValue(0);
5343 if (!LoExists && (!LegalOperations ||
5344 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5345 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5346 return CombineTo(N, Res, Res);
5347 }
5348
5349 // If both halves are used, return as it is.
5350 if (LoExists && HiExists)
5351 return SDValue();
5352
5353 // If the two computed results can be simplified separately, separate them.
5354 if (LoExists) {
5355 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5356 AddToWorklist(Lo.getNode());
5357 SDValue LoOpt = combine(Lo.getNode());
5358 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5359 (!LegalOperations ||
5360 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5361 return CombineTo(N, LoOpt, LoOpt);
5362 }
5363
5364 if (HiExists) {
5365 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5366 AddToWorklist(Hi.getNode());
5367 SDValue HiOpt = combine(Hi.getNode());
5368 if (HiOpt.getNode() && HiOpt != Hi &&
5369 (!LegalOperations ||
5370 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5371 return CombineTo(N, HiOpt, HiOpt);
5372 }
5373
5374 return SDValue();
5375}
5376
5377SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5378 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5379 return Res;
5380
5381 SDValue N0 = N->getOperand(0);
5382 SDValue N1 = N->getOperand(1);
5383 EVT VT = N->getValueType(0);
5384 SDLoc DL(N);
5385
5386 // Constant fold.
5387 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5388 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5389
5390 // canonicalize constant to RHS (vector doesn't have to splat)
5393 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5394
5395 // If the type is twice as wide is legal, transform the mulhu to a wider
5396 // multiply plus a shift.
5397 if (VT.isSimple() && !VT.isVector()) {
5398 MVT Simple = VT.getSimpleVT();
5399 unsigned SimpleSize = Simple.getSizeInBits();
5400 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5401 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5402 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5403 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5404 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5405 // Compute the high part as N1.
5406 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5407 DAG.getConstant(SimpleSize, DL,
5408 getShiftAmountTy(Lo.getValueType())));
5409 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5410 // Compute the low part as N0.
5411 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5412 return CombineTo(N, Lo, Hi);
5413 }
5414 }
5415
5416 return SDValue();
5417}
5418
5419SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5420 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5421 return Res;
5422
5423 SDValue N0 = N->getOperand(0);
5424 SDValue N1 = N->getOperand(1);
5425 EVT VT = N->getValueType(0);
5426 SDLoc DL(N);
5427
5428 // Constant fold.
5429 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5430 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5431
5432 // canonicalize constant to RHS (vector doesn't have to splat)
5435 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5436
5437 // (umul_lohi N0, 0) -> (0, 0)
5438 if (isNullConstant(N1)) {
5439 SDValue Zero = DAG.getConstant(0, DL, VT);
5440 return CombineTo(N, Zero, Zero);
5441 }
5442
5443 // (umul_lohi N0, 1) -> (N0, 0)
5444 if (isOneConstant(N1)) {
5445 SDValue Zero = DAG.getConstant(0, DL, VT);
5446 return CombineTo(N, N0, Zero);
5447 }
5448
5449 // If the type is twice as wide is legal, transform the mulhu to a wider
5450 // multiply plus a shift.
5451 if (VT.isSimple() && !VT.isVector()) {
5452 MVT Simple = VT.getSimpleVT();
5453 unsigned SimpleSize = Simple.getSizeInBits();
5454 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5455 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5456 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5457 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5458 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5459 // Compute the high part as N1.
5460 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5461 DAG.getConstant(SimpleSize, DL,
5462 getShiftAmountTy(Lo.getValueType())));
5463 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5464 // Compute the low part as N0.
5465 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5466 return CombineTo(N, Lo, Hi);
5467 }
5468 }
5469
5470 return SDValue();
5471}
5472
5473SDValue DAGCombiner::visitMULO(SDNode *N) {
5474 SDValue N0 = N->getOperand(0);
5475 SDValue N1 = N->getOperand(1);
5476 EVT VT = N0.getValueType();
5477 bool IsSigned = (ISD::SMULO == N->getOpcode());
5478
5479 EVT CarryVT = N->getValueType(1);
5480 SDLoc DL(N);
5481
5484
5485 // fold operation with constant operands.
5486 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5487 // multiple results.
5488 if (N0C && N1C) {
5489 bool Overflow;
5490 APInt Result =
5491 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5492 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5493 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5494 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5495 }
5496
5497 // canonicalize constant to RHS.
5500 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5501
5502 // fold (mulo x, 0) -> 0 + no carry out
5503 if (isNullOrNullSplat(N1))
5504 return CombineTo(N, DAG.getConstant(0, DL, VT),
5505 DAG.getConstant(0, DL, CarryVT));
5506
5507 // (mulo x, 2) -> (addo x, x)
5508 // FIXME: This needs a freeze.
5509 if (N1C && N1C->getAPIntValue() == 2 &&
5510 (!IsSigned || VT.getScalarSizeInBits() > 2))
5511 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5512 N->getVTList(), N0, N0);
5513
5514 // A 1 bit SMULO overflows if both inputs are 1.
5515 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5516 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5517 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5518 DAG.getConstant(0, DL, VT), ISD::SETNE);
5519 return CombineTo(N, And, Cmp);
5520 }
5521
5522 // If it cannot overflow, transform into a mul.
5523 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5524 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5525 DAG.getConstant(0, DL, CarryVT));
5526 return SDValue();
5527}
5528
5529// Function to calculate whether the Min/Max pair of SDNodes (potentially
5530// swapped around) make a signed saturate pattern, clamping to between a signed
5531// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5532// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5533// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5534// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5536 SDValue N3, ISD::CondCode CC, unsigned &BW,
5537 bool &Unsigned, SelectionDAG &DAG) {
5538 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5539 ISD::CondCode CC) {
5540 // The compare and select operand should be the same or the select operands
5541 // should be truncated versions of the comparison.
5542 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5543 return 0;
5544 // The constants need to be the same or a truncated version of each other.
5547 if (!N1C || !N3C)
5548 return 0;
5549 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5550 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5551 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5552 return 0;
5553 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5554 };
5555
5556 // Check the initial value is a SMIN/SMAX equivalent.
5557 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5558 if (!Opcode0)
5559 return SDValue();
5560
5561 // We could only need one range check, if the fptosi could never produce
5562 // the upper value.
5563 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5564 if (isNullOrNullSplat(N3)) {
5565 EVT IntVT = N0.getValueType().getScalarType();
5566 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5567 if (FPVT.isSimple()) {
5568 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5569 const fltSemantics &Semantics = InputTy->getFltSemantics();
5570 uint32_t MinBitWidth =
5571 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5572 if (IntVT.getSizeInBits() >= MinBitWidth) {
5573 Unsigned = true;
5574 BW = PowerOf2Ceil(MinBitWidth);
5575 return N0;
5576 }
5577 }
5578 }
5579 }
5580
5581 SDValue N00, N01, N02, N03;
5582 ISD::CondCode N0CC;
5583 switch (N0.getOpcode()) {
5584 case ISD::SMIN:
5585 case ISD::SMAX:
5586 N00 = N02 = N0.getOperand(0);
5587 N01 = N03 = N0.getOperand(1);
5588 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5589 break;
5590 case ISD::SELECT_CC:
5591 N00 = N0.getOperand(0);
5592 N01 = N0.getOperand(1);
5593 N02 = N0.getOperand(2);
5594 N03 = N0.getOperand(3);
5595 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5596 break;
5597 case ISD::SELECT:
5598 case ISD::VSELECT:
5599 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5600 return SDValue();
5601 N00 = N0.getOperand(0).getOperand(0);
5602 N01 = N0.getOperand(0).getOperand(1);
5603 N02 = N0.getOperand(1);
5604 N03 = N0.getOperand(2);
5605 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5606 break;
5607 default:
5608 return SDValue();
5609 }
5610
5611 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5612 if (!Opcode1 || Opcode0 == Opcode1)
5613 return SDValue();
5614
5615 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5616 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5617 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5618 return SDValue();
5619
5620 const APInt &MinC = MinCOp->getAPIntValue();
5621 const APInt &MaxC = MaxCOp->getAPIntValue();
5622 APInt MinCPlus1 = MinC + 1;
5623 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5624 BW = MinCPlus1.exactLogBase2() + 1;
5625 Unsigned = false;
5626 return N02;
5627 }
5628
5629 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5630 BW = MinCPlus1.exactLogBase2();
5631 Unsigned = true;
5632 return N02;
5633 }
5634
5635 return SDValue();
5636}
5637
5640 SelectionDAG &DAG) {
5641 unsigned BW;
5642 bool Unsigned;
5643 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5644 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5645 return SDValue();
5646 EVT FPVT = Fp.getOperand(0).getValueType();
5647 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5648 if (FPVT.isVector())
5649 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5650 FPVT.getVectorElementCount());
5651 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5652 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5653 return SDValue();
5654 SDLoc DL(Fp);
5655 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5656 DAG.getValueType(NewVT.getScalarType()));
5657 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5658}
5659
5662 SelectionDAG &DAG) {
5663 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5664 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5665 // be truncated versions of the setcc (N0/N1).
5666 if ((N0 != N2 &&
5667 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5669 return SDValue();
5672 if (!N1C || !N3C)
5673 return SDValue();
5674 const APInt &C1 = N1C->getAPIntValue();
5675 const APInt &C3 = N3C->getAPIntValue();
5676 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5677 C1 != C3.zext(C1.getBitWidth()))
5678 return SDValue();
5679
5680 unsigned BW = (C1 + 1).exactLogBase2();
5681 EVT FPVT = N0.getOperand(0).getValueType();
5682 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5683 if (FPVT.isVector())
5684 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5685 FPVT.getVectorElementCount());
5687 FPVT, NewVT))
5688 return SDValue();
5689
5690 SDValue Sat =
5691 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5692 DAG.getValueType(NewVT.getScalarType()));
5693 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5694}
5695
5696SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5697 SDValue N0 = N->getOperand(0);
5698 SDValue N1 = N->getOperand(1);
5699 EVT VT = N0.getValueType();
5700 unsigned Opcode = N->getOpcode();
5701 SDLoc DL(N);
5702
5703 // fold operation with constant operands.
5704 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5705 return C;
5706
5707 // If the operands are the same, this is a no-op.
5708 if (N0 == N1)
5709 return N0;
5710
5711 // canonicalize constant to RHS
5714 return DAG.getNode(Opcode, DL, VT, N1, N0);
5715
5716 // fold vector ops
5717 if (VT.isVector())
5718 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5719 return FoldedVOp;
5720
5721 // reassociate minmax
5722 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5723 return RMINMAX;
5724
5725 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5726 // Only do this if:
5727 // 1. The current op isn't legal and the flipped is.
5728 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5729 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5730 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5731 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5732 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5733 unsigned AltOpcode;
5734 switch (Opcode) {
5735 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5736 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5737 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5738 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5739 default: llvm_unreachable("Unknown MINMAX opcode");
5740 }
5741 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5742 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5743 }
5744
5745 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5747 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5748 return S;
5749 if (Opcode == ISD::UMIN)
5750 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5751 return S;
5752
5753 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5754 auto ReductionOpcode = [](unsigned Opcode) {
5755 switch (Opcode) {
5756 case ISD::SMIN:
5757 return ISD::VECREDUCE_SMIN;
5758 case ISD::SMAX:
5759 return ISD::VECREDUCE_SMAX;
5760 case ISD::UMIN:
5761 return ISD::VECREDUCE_UMIN;
5762 case ISD::UMAX:
5763 return ISD::VECREDUCE_UMAX;
5764 default:
5765 llvm_unreachable("Unexpected opcode");
5766 }
5767 };
5768 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5769 SDLoc(N), VT, N0, N1))
5770 return SD;
5771
5772 // Simplify the operands using demanded-bits information.
5774 return SDValue(N, 0);
5775
5776 return SDValue();
5777}
5778
5779/// If this is a bitwise logic instruction and both operands have the same
5780/// opcode, try to sink the other opcode after the logic instruction.
5781SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5782 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5783 EVT VT = N0.getValueType();
5784 unsigned LogicOpcode = N->getOpcode();
5785 unsigned HandOpcode = N0.getOpcode();
5786 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5787 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5788
5789 // Bail early if none of these transforms apply.
5790 if (N0.getNumOperands() == 0)
5791 return SDValue();
5792
5793 // FIXME: We should check number of uses of the operands to not increase
5794 // the instruction count for all transforms.
5795
5796 // Handle size-changing casts (or sign_extend_inreg).
5797 SDValue X = N0.getOperand(0);
5798 SDValue Y = N1.getOperand(0);
5799 EVT XVT = X.getValueType();
5800 SDLoc DL(N);
5801 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5802 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5803 N0.getOperand(1) == N1.getOperand(1))) {
5804 // If both operands have other uses, this transform would create extra
5805 // instructions without eliminating anything.
5806 if (!N0.hasOneUse() && !N1.hasOneUse())
5807 return SDValue();
5808 // We need matching integer source types.
5809 if (XVT != Y.getValueType())
5810 return SDValue();
5811 // Don't create an illegal op during or after legalization. Don't ever
5812 // create an unsupported vector op.
5813 if ((VT.isVector() || LegalOperations) &&
5814 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5815 return SDValue();
5816 // Avoid infinite looping with PromoteIntBinOp.
5817 // TODO: Should we apply desirable/legal constraints to all opcodes?
5818 if ((HandOpcode == ISD::ANY_EXTEND ||
5819 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5820 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5821 return SDValue();
5822 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5823 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5824 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5825 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5826 return DAG.getNode(HandOpcode, DL, VT, Logic);
5827 }
5828
5829 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5830 if (HandOpcode == ISD::TRUNCATE) {
5831 // If both operands have other uses, this transform would create extra
5832 // instructions without eliminating anything.
5833 if (!N0.hasOneUse() && !N1.hasOneUse())
5834 return SDValue();
5835 // We need matching source types.
5836 if (XVT != Y.getValueType())
5837 return SDValue();
5838 // Don't create an illegal op during or after legalization.
5839 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5840 return SDValue();
5841 // Be extra careful sinking truncate. If it's free, there's no benefit in
5842 // widening a binop. Also, don't create a logic op on an illegal type.
5843 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5844 return SDValue();
5845 if (!TLI.isTypeLegal(XVT))
5846 return SDValue();
5847 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5848 return DAG.getNode(HandOpcode, DL, VT, Logic);
5849 }
5850
5851 // For binops SHL/SRL/SRA/AND:
5852 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5853 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5854 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5855 N0.getOperand(1) == N1.getOperand(1)) {
5856 // If either operand has other uses, this transform is not an improvement.
5857 if (!N0.hasOneUse() || !N1.hasOneUse())
5858 return SDValue();
5859 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5860 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5861 }
5862
5863 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5864 if (HandOpcode == ISD::BSWAP) {
5865 // If either operand has other uses, this transform is not an improvement.
5866 if (!N0.hasOneUse() || !N1.hasOneUse())
5867 return SDValue();
5868 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5869 return DAG.getNode(HandOpcode, DL, VT, Logic);
5870 }
5871
5872 // For funnel shifts FSHL/FSHR:
5873 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5874 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5875 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5876 N0.getOperand(2) == N1.getOperand(2)) {
5877 if (!N0.hasOneUse() || !N1.hasOneUse())
5878 return SDValue();
5879 SDValue X1 = N0.getOperand(1);
5880 SDValue Y1 = N1.getOperand(1);
5881 SDValue S = N0.getOperand(2);
5882 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5883 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5884 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5885 }
5886
5887 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5888 // Only perform this optimization up until type legalization, before
5889 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5890 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5891 // we don't want to undo this promotion.
5892 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5893 // on scalars.
5894 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5895 Level <= AfterLegalizeTypes) {
5896 // Input types must be integer and the same.
5897 if (XVT.isInteger() && XVT == Y.getValueType() &&
5898 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5899 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5900 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5901 return DAG.getNode(HandOpcode, DL, VT, Logic);
5902 }
5903 }
5904
5905 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5906 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5907 // If both shuffles use the same mask, and both shuffle within a single
5908 // vector, then it is worthwhile to move the swizzle after the operation.
5909 // The type-legalizer generates this pattern when loading illegal
5910 // vector types from memory. In many cases this allows additional shuffle
5911 // optimizations.
5912 // There are other cases where moving the shuffle after the xor/and/or
5913 // is profitable even if shuffles don't perform a swizzle.
5914 // If both shuffles use the same mask, and both shuffles have the same first
5915 // or second operand, then it might still be profitable to move the shuffle
5916 // after the xor/and/or operation.
5917 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5918 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5919 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5920 assert(X.getValueType() == Y.getValueType() &&
5921 "Inputs to shuffles are not the same type");
5922
5923 // Check that both shuffles use the same mask. The masks are known to be of
5924 // the same length because the result vector type is the same.
5925 // Check also that shuffles have only one use to avoid introducing extra
5926 // instructions.
5927 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5928 !SVN0->getMask().equals(SVN1->getMask()))
5929 return SDValue();
5930
5931 // Don't try to fold this node if it requires introducing a
5932 // build vector of all zeros that might be illegal at this stage.
5933 SDValue ShOp = N0.getOperand(1);
5934 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5935 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5936
5937 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5938 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5939 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5940 N0.getOperand(0), N1.getOperand(0));
5941 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5942 }
5943
5944 // Don't try to fold this node if it requires introducing a
5945 // build vector of all zeros that might be illegal at this stage.
5946 ShOp = N0.getOperand(0);
5947 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5948 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5949
5950 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5951 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5952 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5953 N1.getOperand(1));
5954 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5955 }
5956 }
5957
5958 return SDValue();
5959}
5960
5961/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5962SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5963 const SDLoc &DL) {
5964 SDValue LL, LR, RL, RR, N0CC, N1CC;
5965 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5966 !isSetCCEquivalent(N1, RL, RR, N1CC))
5967 return SDValue();
5968
5969 assert(N0.getValueType() == N1.getValueType() &&
5970 "Unexpected operand types for bitwise logic op");
5971 assert(LL.getValueType() == LR.getValueType() &&
5972 RL.getValueType() == RR.getValueType() &&
5973 "Unexpected operand types for setcc");
5974
5975 // If we're here post-legalization or the logic op type is not i1, the logic
5976 // op type must match a setcc result type. Also, all folds require new
5977 // operations on the left and right operands, so those types must match.
5978 EVT VT = N0.getValueType();
5979 EVT OpVT = LL.getValueType();
5980 if (LegalOperations || VT.getScalarType() != MVT::i1)
5981 if (VT != getSetCCResultType(OpVT))
5982 return SDValue();
5983 if (OpVT != RL.getValueType())
5984 return SDValue();
5985
5986 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5987 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5988 bool IsInteger = OpVT.isInteger();
5989 if (LR == RR && CC0 == CC1 && IsInteger) {
5990 bool IsZero = isNullOrNullSplat(LR);
5991 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5992
5993 // All bits clear?
5994 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5995 // All sign bits clear?
5996 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5997 // Any bits set?
5998 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5999 // Any sign bits set?
6000 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
6001
6002 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
6003 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
6004 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
6005 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
6006 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
6007 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
6008 AddToWorklist(Or.getNode());
6009 return DAG.getSetCC(DL, VT, Or, LR, CC1);
6010 }
6011
6012 // All bits set?
6013 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
6014 // All sign bits set?
6015 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
6016 // Any bits clear?
6017 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
6018 // Any sign bits clear?
6019 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
6020
6021 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
6022 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
6023 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
6024 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
6025 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6026 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6027 AddToWorklist(And.getNode());
6028 return DAG.getSetCC(DL, VT, And, LR, CC1);
6029 }
6030 }
6031
6032 // TODO: What is the 'or' equivalent of this fold?
6033 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6034 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6035 IsInteger && CC0 == ISD::SETNE &&
6036 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6037 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6038 SDValue One = DAG.getConstant(1, DL, OpVT);
6039 SDValue Two = DAG.getConstant(2, DL, OpVT);
6040 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6041 AddToWorklist(Add.getNode());
6042 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6043 }
6044
6045 // Try more general transforms if the predicates match and the only user of
6046 // the compares is the 'and' or 'or'.
6047 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6048 N0.hasOneUse() && N1.hasOneUse()) {
6049 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6050 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6051 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6052 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6053 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6054 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6055 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6056 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6057 }
6058
6059 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6060 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6061 // Match a shared variable operand and 2 non-opaque constant operands.
6062 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6063 // The difference of the constants must be a single bit.
6064 const APInt &CMax =
6065 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6066 const APInt &CMin =
6067 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6068 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6069 };
6070 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6071 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6072 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6073 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6074 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6075 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6076 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6077 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6078 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6079 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6080 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6081 }
6082 }
6083 }
6084
6085 // Canonicalize equivalent operands to LL == RL.
6086 if (LL == RR && LR == RL) {
6088 std::swap(RL, RR);
6089 }
6090
6091 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6092 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6093 if (LL == RL && LR == RR) {
6094 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6095 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6096 if (NewCC != ISD::SETCC_INVALID &&
6097 (!LegalOperations ||
6098 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6099 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6100 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6101 }
6102
6103 return SDValue();
6104}
6105
6106static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6107 SelectionDAG &DAG) {
6108 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6109}
6110
6111static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6112 SelectionDAG &DAG) {
6113 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6114}
6115
6116static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6117 ISD::CondCode CC, unsigned OrAndOpcode,
6118 SelectionDAG &DAG,
6119 bool isFMAXNUMFMINNUM_IEEE,
6120 bool isFMAXNUMFMINNUM) {
6121 // The optimization cannot be applied for all the predicates because
6122 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6123 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6124 // applied at all if one of the operands is a signaling NaN.
6125
6126 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6127 // are non NaN values.
6128 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6129 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6130 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6131 isFMAXNUMFMINNUM_IEEE
6134 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6135 (OrAndOpcode == ISD::OR)) ||
6136 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6137 (OrAndOpcode == ISD::AND)))
6138 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6139 isFMAXNUMFMINNUM_IEEE
6142 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6143 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6144 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6145 // that there are not any sNaNs, then the optimization is not valid
6146 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6147 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6148 // we can prove that we do not have any sNaNs, then we can do the
6149 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6150 // cases.
6151 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6152 (OrAndOpcode == ISD::OR)) ||
6153 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6154 (OrAndOpcode == ISD::AND)))
6155 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6156 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6157 isFMAXNUMFMINNUM_IEEE
6160 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6161 (OrAndOpcode == ISD::OR)) ||
6162 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6163 (OrAndOpcode == ISD::AND)))
6164 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6165 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6166 isFMAXNUMFMINNUM_IEEE
6169 return ISD::DELETED_NODE;
6170}
6171
6174 assert(
6175 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6176 "Invalid Op to combine SETCC with");
6177
6178 // TODO: Search past casts/truncates.
6179 SDValue LHS = LogicOp->getOperand(0);
6180 SDValue RHS = LogicOp->getOperand(1);
6181 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6182 !LHS->hasOneUse() || !RHS->hasOneUse())
6183 return SDValue();
6184
6185 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6187 LogicOp, LHS.getNode(), RHS.getNode());
6188
6189 SDValue LHS0 = LHS->getOperand(0);
6190 SDValue RHS0 = RHS->getOperand(0);
6191 SDValue LHS1 = LHS->getOperand(1);
6192 SDValue RHS1 = RHS->getOperand(1);
6193 // TODO: We don't actually need a splat here, for vectors we just need the
6194 // invariants to hold for each element.
6195 auto *LHS1C = isConstOrConstSplat(LHS1);
6196 auto *RHS1C = isConstOrConstSplat(RHS1);
6197 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6198 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6199 EVT VT = LogicOp->getValueType(0);
6200 EVT OpVT = LHS0.getValueType();
6201 SDLoc DL(LogicOp);
6202
6203 // Check if the operands of an and/or operation are comparisons and if they
6204 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6205 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6206 // sequence will be replaced with min-cmp sequence:
6207 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6208 // and and-cmp-cmp will be replaced with max-cmp sequence:
6209 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6210 // The optimization does not work for `==` or `!=` .
6211 // The two comparisons should have either the same predicate or the
6212 // predicate of one of the comparisons is the opposite of the other one.
6213 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6215 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6217 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6218 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6219 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6220 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6221 (OpVT.isFloatingPoint() &&
6222 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6224 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6225 CCL != ISD::SETTRUE &&
6226 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6227
6228 SDValue CommonValue, Operand1, Operand2;
6230 if (CCL == CCR) {
6231 if (LHS0 == RHS0) {
6232 CommonValue = LHS0;
6233 Operand1 = LHS1;
6234 Operand2 = RHS1;
6236 } else if (LHS1 == RHS1) {
6237 CommonValue = LHS1;
6238 Operand1 = LHS0;
6239 Operand2 = RHS0;
6240 CC = CCL;
6241 }
6242 } else {
6243 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6244 if (LHS0 == RHS1) {
6245 CommonValue = LHS0;
6246 Operand1 = LHS1;
6247 Operand2 = RHS0;
6248 CC = CCR;
6249 } else if (RHS0 == LHS1) {
6250 CommonValue = LHS1;
6251 Operand1 = LHS0;
6252 Operand2 = RHS1;
6253 CC = CCL;
6254 }
6255 }
6256
6257 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6258 // handle it using OR/AND.
6259 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6261 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6263
6264 if (CC != ISD::SETCC_INVALID) {
6265 unsigned NewOpcode = ISD::DELETED_NODE;
6266 bool IsSigned = isSignedIntSetCC(CC);
6267 if (OpVT.isInteger()) {
6268 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6269 CC == ISD::SETLT || CC == ISD::SETULT);
6270 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6271 if (IsLess == IsOr)
6272 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6273 else
6274 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6275 } else if (OpVT.isFloatingPoint())
6276 NewOpcode =
6277 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6278 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6279
6280 if (NewOpcode != ISD::DELETED_NODE) {
6281 SDValue MinMaxValue =
6282 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6283 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6284 }
6285 }
6286 }
6287
6288 if (TargetPreference == AndOrSETCCFoldKind::None)
6289 return SDValue();
6290
6291 if (CCL == CCR &&
6292 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6293 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6294 const APInt &APLhs = LHS1C->getAPIntValue();
6295 const APInt &APRhs = RHS1C->getAPIntValue();
6296
6297 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6298 // case this is just a compare).
6299 if (APLhs == (-APRhs) &&
6300 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6301 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6302 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6303 // (icmp eq A, C) | (icmp eq A, -C)
6304 // -> (icmp eq Abs(A), C)
6305 // (icmp ne A, C) & (icmp ne A, -C)
6306 // -> (icmp ne Abs(A), C)
6307 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6308 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6309 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6310 } else if (TargetPreference &
6312
6313 // AndOrSETCCFoldKind::AddAnd:
6314 // A == C0 | A == C1
6315 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6316 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6317 // A != C0 & A != C1
6318 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6319 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6320
6321 // AndOrSETCCFoldKind::NotAnd:
6322 // A == C0 | A == C1
6323 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6324 // -> ~A & smin(C0, C1) == 0
6325 // A != C0 & A != C1
6326 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6327 // -> ~A & smin(C0, C1) != 0
6328
6329 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6330 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6331 APInt Dif = MaxC - MinC;
6332 if (!Dif.isZero() && Dif.isPowerOf2()) {
6333 if (MaxC.isAllOnes() &&
6334 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6335 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6336 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6337 DAG.getConstant(MinC, DL, OpVT));
6338 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6339 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6340 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6341
6342 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6343 DAG.getConstant(-MinC, DL, OpVT));
6344 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6345 DAG.getConstant(~Dif, DL, OpVT));
6346 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6347 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6348 }
6349 }
6350 }
6351 }
6352
6353 return SDValue();
6354}
6355
6356// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6357// We canonicalize to the `select` form in the middle end, but the `and` form
6358// gets better codegen and all tested targets (arm, x86, riscv)
6360 const SDLoc &DL, SelectionDAG &DAG) {
6361 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6362 if (!isNullConstant(F))
6363 return SDValue();
6364
6365 EVT CondVT = Cond.getValueType();
6366 if (TLI.getBooleanContents(CondVT) !=
6368 return SDValue();
6369
6370 if (T.getOpcode() != ISD::AND)
6371 return SDValue();
6372
6373 if (!isOneConstant(T.getOperand(1)))
6374 return SDValue();
6375
6376 EVT OpVT = T.getValueType();
6377
6378 SDValue CondMask =
6379 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6380 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6381}
6382
6383/// This contains all DAGCombine rules which reduce two values combined by
6384/// an And operation to a single value. This makes them reusable in the context
6385/// of visitSELECT(). Rules involving constants are not included as
6386/// visitSELECT() already handles those cases.
6387SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6388 EVT VT = N1.getValueType();
6389 SDLoc DL(N);
6390
6391 // fold (and x, undef) -> 0
6392 if (N0.isUndef() || N1.isUndef())
6393 return DAG.getConstant(0, DL, VT);
6394
6395 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6396 return V;
6397
6398 // Canonicalize:
6399 // and(x, add) -> and(add, x)
6400 if (N1.getOpcode() == ISD::ADD)
6401 std::swap(N0, N1);
6402
6403 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6404 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6405 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6406 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6407 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6408 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6409 // immediate for an add, but it is legal if its top c2 bits are set,
6410 // transform the ADD so the immediate doesn't need to be materialized
6411 // in a register.
6412 APInt ADDC = ADDI->getAPIntValue();
6413 APInt SRLC = SRLI->getAPIntValue();
6414 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6415 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6417 SRLC.getZExtValue());
6418 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6419 ADDC |= Mask;
6420 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6421 SDLoc DL0(N0);
6422 SDValue NewAdd =
6423 DAG.getNode(ISD::ADD, DL0, VT,
6424 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6425 CombineTo(N0.getNode(), NewAdd);
6426 // Return N so it doesn't get rechecked!
6427 return SDValue(N, 0);
6428 }
6429 }
6430 }
6431 }
6432 }
6433 }
6434
6435 return SDValue();
6436}
6437
6438bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6439 EVT LoadResultTy, EVT &ExtVT) {
6440 if (!AndC->getAPIntValue().isMask())
6441 return false;
6442
6443 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6444
6445 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6446 EVT LoadedVT = LoadN->getMemoryVT();
6447
6448 if (ExtVT == LoadedVT &&
6449 (!LegalOperations ||
6450 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6451 // ZEXTLOAD will match without needing to change the size of the value being
6452 // loaded.
6453 return true;
6454 }
6455
6456 // Do not change the width of a volatile or atomic loads.
6457 if (!LoadN->isSimple())
6458 return false;
6459
6460 // Do not generate loads of non-round integer types since these can
6461 // be expensive (and would be wrong if the type is not byte sized).
6462 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6463 return false;
6464
6465 if (LegalOperations &&
6466 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6467 return false;
6468
6469 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6470 return false;
6471
6472 return true;
6473}
6474
6475bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6476 ISD::LoadExtType ExtType, EVT &MemVT,
6477 unsigned ShAmt) {
6478 if (!LDST)
6479 return false;
6480 // Only allow byte offsets.
6481 if (ShAmt % 8)
6482 return false;
6483
6484 // Do not generate loads of non-round integer types since these can
6485 // be expensive (and would be wrong if the type is not byte sized).
6486 if (!MemVT.isRound())
6487 return false;
6488
6489 // Don't change the width of a volatile or atomic loads.
6490 if (!LDST->isSimple())
6491 return false;
6492
6493 EVT LdStMemVT = LDST->getMemoryVT();
6494
6495 // Bail out when changing the scalable property, since we can't be sure that
6496 // we're actually narrowing here.
6497 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6498 return false;
6499
6500 // Verify that we are actually reducing a load width here.
6501 if (LdStMemVT.bitsLT(MemVT))
6502 return false;
6503
6504 // Ensure that this isn't going to produce an unsupported memory access.
6505 if (ShAmt) {
6506 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6507 const unsigned ByteShAmt = ShAmt / 8;
6508 const Align LDSTAlign = LDST->getAlign();
6509 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6510 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6511 LDST->getAddressSpace(), NarrowAlign,
6512 LDST->getMemOperand()->getFlags()))
6513 return false;
6514 }
6515
6516 // It's not possible to generate a constant of extended or untyped type.
6517 EVT PtrType = LDST->getBasePtr().getValueType();
6518 if (PtrType == MVT::Untyped || PtrType.isExtended())
6519 return false;
6520
6521 if (isa<LoadSDNode>(LDST)) {
6522 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6523 // Don't transform one with multiple uses, this would require adding a new
6524 // load.
6525 if (!SDValue(Load, 0).hasOneUse())
6526 return false;
6527
6528 if (LegalOperations &&
6529 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6530 return false;
6531
6532 // For the transform to be legal, the load must produce only two values
6533 // (the value loaded and the chain). Don't transform a pre-increment
6534 // load, for example, which produces an extra value. Otherwise the
6535 // transformation is not equivalent, and the downstream logic to replace
6536 // uses gets things wrong.
6537 if (Load->getNumValues() > 2)
6538 return false;
6539
6540 // If the load that we're shrinking is an extload and we're not just
6541 // discarding the extension we can't simply shrink the load. Bail.
6542 // TODO: It would be possible to merge the extensions in some cases.
6543 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6544 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6545 return false;
6546
6547 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6548 return false;
6549 } else {
6550 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6551 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6552 // Can't write outside the original store
6553 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6554 return false;
6555
6556 if (LegalOperations &&
6557 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6558 return false;
6559 }
6560 return true;
6561}
6562
6563bool DAGCombiner::SearchForAndLoads(SDNode *N,
6565 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6566 ConstantSDNode *Mask,
6567 SDNode *&NodeToMask) {
6568 // Recursively search for the operands, looking for loads which can be
6569 // narrowed.
6570 for (SDValue Op : N->op_values()) {
6571 if (Op.getValueType().isVector())
6572 return false;
6573
6574 // Some constants may need fixing up later if they are too large.
6575 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6576 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6577 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6578 NodesWithConsts.insert(N);
6579 continue;
6580 }
6581
6582 if (!Op.hasOneUse())
6583 return false;
6584
6585 switch(Op.getOpcode()) {
6586 case ISD::LOAD: {
6587 auto *Load = cast<LoadSDNode>(Op);
6588 EVT ExtVT;
6589 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6590 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6591
6592 // ZEXTLOAD is already small enough.
6593 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6594 ExtVT.bitsGE(Load->getMemoryVT()))
6595 continue;
6596
6597 // Use LE to convert equal sized loads to zext.
6598 if (ExtVT.bitsLE(Load->getMemoryVT()))
6599 Loads.push_back(Load);
6600
6601 continue;
6602 }
6603 return false;
6604 }
6605 case ISD::ZERO_EXTEND:
6606 case ISD::AssertZext: {
6607 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6608 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6609 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6610 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6611 Op.getOperand(0).getValueType();
6612
6613 // We can accept extending nodes if the mask is wider or an equal
6614 // width to the original type.
6615 if (ExtVT.bitsGE(VT))
6616 continue;
6617 break;
6618 }
6619 case ISD::OR:
6620 case ISD::XOR:
6621 case ISD::AND:
6622 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6623 NodeToMask))
6624 return false;
6625 continue;
6626 }
6627
6628 // Allow one node which will masked along with any loads found.
6629 if (NodeToMask)
6630 return false;
6631
6632 // Also ensure that the node to be masked only produces one data result.
6633 NodeToMask = Op.getNode();
6634 if (NodeToMask->getNumValues() > 1) {
6635 bool HasValue = false;
6636 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6637 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6638 if (VT != MVT::Glue && VT != MVT::Other) {
6639 if (HasValue) {
6640 NodeToMask = nullptr;
6641 return false;
6642 }
6643 HasValue = true;
6644 }
6645 }
6646 assert(HasValue && "Node to be masked has no data result?");
6647 }
6648 }
6649 return true;
6650}
6651
6652bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6653 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6654 if (!Mask)
6655 return false;
6656
6657 if (!Mask->getAPIntValue().isMask())
6658 return false;
6659
6660 // No need to do anything if the and directly uses a load.
6661 if (isa<LoadSDNode>(N->getOperand(0)))
6662 return false;
6663
6665 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6666 SDNode *FixupNode = nullptr;
6667 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6668 if (Loads.empty())
6669 return false;
6670
6671 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6672 SDValue MaskOp = N->getOperand(1);
6673
6674 // If it exists, fixup the single node we allow in the tree that needs
6675 // masking.
6676 if (FixupNode) {
6677 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6678 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6679 FixupNode->getValueType(0),
6680 SDValue(FixupNode, 0), MaskOp);
6681 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6682 if (And.getOpcode() == ISD ::AND)
6683 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6684 }
6685
6686 // Narrow any constants that need it.
6687 for (auto *LogicN : NodesWithConsts) {
6688 SDValue Op0 = LogicN->getOperand(0);
6689 SDValue Op1 = LogicN->getOperand(1);
6690
6691 if (isa<ConstantSDNode>(Op0))
6692 Op0 =
6693 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6694
6695 if (isa<ConstantSDNode>(Op1))
6696 Op1 =
6697 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6698
6699 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6700 std::swap(Op0, Op1);
6701
6702 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6703 }
6704
6705 // Create narrow loads.
6706 for (auto *Load : Loads) {
6707 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6708 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6709 SDValue(Load, 0), MaskOp);
6710 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6711 if (And.getOpcode() == ISD ::AND)
6712 And = SDValue(
6713 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6714 SDValue NewLoad = reduceLoadWidth(And.getNode());
6715 assert(NewLoad &&
6716 "Shouldn't be masking the load if it can't be narrowed");
6717 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6718 }
6719 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6720 return true;
6721 }
6722 return false;
6723}
6724
6725// Unfold
6726// x & (-1 'logical shift' y)
6727// To
6728// (x 'opposite logical shift' y) 'logical shift' y
6729// if it is better for performance.
6730SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6731 assert(N->getOpcode() == ISD::AND);
6732
6733 SDValue N0 = N->getOperand(0);
6734 SDValue N1 = N->getOperand(1);
6735
6736 // Do we actually prefer shifts over mask?
6738 return SDValue();
6739
6740 // Try to match (-1 '[outer] logical shift' y)
6741 unsigned OuterShift;
6742 unsigned InnerShift; // The opposite direction to the OuterShift.
6743 SDValue Y; // Shift amount.
6744 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6745 if (!M.hasOneUse())
6746 return false;
6747 OuterShift = M->getOpcode();
6748 if (OuterShift == ISD::SHL)
6749 InnerShift = ISD::SRL;
6750 else if (OuterShift == ISD::SRL)
6751 InnerShift = ISD::SHL;
6752 else
6753 return false;
6754 if (!isAllOnesConstant(M->getOperand(0)))
6755 return false;
6756 Y = M->getOperand(1);
6757 return true;
6758 };
6759
6760 SDValue X;
6761 if (matchMask(N1))
6762 X = N0;
6763 else if (matchMask(N0))
6764 X = N1;
6765 else
6766 return SDValue();
6767
6768 SDLoc DL(N);
6769 EVT VT = N->getValueType(0);
6770
6771 // tmp = x 'opposite logical shift' y
6772 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6773 // ret = tmp 'logical shift' y
6774 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6775
6776 return T1;
6777}
6778
6779/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6780/// For a target with a bit test, this is expected to become test + set and save
6781/// at least 1 instruction.
6783 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6784
6785 // Look through an optional extension.
6786 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6787 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6788 And0 = And0.getOperand(0);
6789 if (!isOneConstant(And1) || !And0.hasOneUse())
6790 return SDValue();
6791
6792 SDValue Src = And0;
6793
6794 // Attempt to find a 'not' op.
6795 // TODO: Should we favor test+set even without the 'not' op?
6796 bool FoundNot = false;
6797 if (isBitwiseNot(Src)) {
6798 FoundNot = true;
6799 Src = Src.getOperand(0);
6800
6801 // Look though an optional truncation. The source operand may not be the
6802 // same type as the original 'and', but that is ok because we are masking
6803 // off everything but the low bit.
6804 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6805 Src = Src.getOperand(0);
6806 }
6807
6808 // Match a shift-right by constant.
6809 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6810 return SDValue();
6811
6812 // This is probably not worthwhile without a supported type.
6813 EVT SrcVT = Src.getValueType();
6814 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6815 if (!TLI.isTypeLegal(SrcVT))
6816 return SDValue();
6817
6818 // We might have looked through casts that make this transform invalid.
6819 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6820 SDValue ShiftAmt = Src.getOperand(1);
6821 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6822 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6823 return SDValue();
6824
6825 // Set source to shift source.
6826 Src = Src.getOperand(0);
6827
6828 // Try again to find a 'not' op.
6829 // TODO: Should we favor test+set even with two 'not' ops?
6830 if (!FoundNot) {
6831 if (!isBitwiseNot(Src))
6832 return SDValue();
6833 Src = Src.getOperand(0);
6834 }
6835
6836 if (!TLI.hasBitTest(Src, ShiftAmt))
6837 return SDValue();
6838
6839 // Turn this into a bit-test pattern using mask op + setcc:
6840 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6841 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6842 SDLoc DL(And);
6843 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6844 EVT CCVT =
6845 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6846 SDValue Mask = DAG.getConstant(
6847 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6848 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6849 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6850 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6851 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6852}
6853
6854/// For targets that support usubsat, match a bit-hack form of that operation
6855/// that ends in 'and' and convert it.
6857 EVT VT = N->getValueType(0);
6858 unsigned BitWidth = VT.getScalarSizeInBits();
6859 APInt SignMask = APInt::getSignMask(BitWidth);
6860
6861 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6862 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6863 // xor/add with SMIN (signmask) are logically equivalent.
6864 SDValue X;
6865 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6867 m_SpecificInt(BitWidth - 1))))) &&
6870 m_SpecificInt(BitWidth - 1))))))
6871 return SDValue();
6872
6873 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6874 DAG.getConstant(SignMask, DL, VT));
6875}
6876
6877/// Given a bitwise logic operation N with a matching bitwise logic operand,
6878/// fold a pattern where 2 of the source operands are identically shifted
6879/// values. For example:
6880/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6882 SelectionDAG &DAG) {
6883 unsigned LogicOpcode = N->getOpcode();
6884 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6885 "Expected bitwise logic operation");
6886
6887 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6888 return SDValue();
6889
6890 // Match another bitwise logic op and a shift.
6891 unsigned ShiftOpcode = ShiftOp.getOpcode();
6892 if (LogicOp.getOpcode() != LogicOpcode ||
6893 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6894 ShiftOpcode == ISD::SRA))
6895 return SDValue();
6896
6897 // Match another shift op inside the first logic operand. Handle both commuted
6898 // possibilities.
6899 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6900 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6901 SDValue X1 = ShiftOp.getOperand(0);
6902 SDValue Y = ShiftOp.getOperand(1);
6903 SDValue X0, Z;
6904 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6905 LogicOp.getOperand(0).getOperand(1) == Y) {
6906 X0 = LogicOp.getOperand(0).getOperand(0);
6907 Z = LogicOp.getOperand(1);
6908 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6909 LogicOp.getOperand(1).getOperand(1) == Y) {
6910 X0 = LogicOp.getOperand(1).getOperand(0);
6911 Z = LogicOp.getOperand(0);
6912 } else {
6913 return SDValue();
6914 }
6915
6916 EVT VT = N->getValueType(0);
6917 SDLoc DL(N);
6918 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6919 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6920 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6921}
6922
6923/// Given a tree of logic operations with shape like
6924/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6925/// try to match and fold shift operations with the same shift amount.
6926/// For example:
6927/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6928/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6930 SDValue RightHand, SelectionDAG &DAG) {
6931 unsigned LogicOpcode = N->getOpcode();
6932 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6933 "Expected bitwise logic operation");
6934 if (LeftHand.getOpcode() != LogicOpcode ||
6935 RightHand.getOpcode() != LogicOpcode)
6936 return SDValue();
6937 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6938 return SDValue();
6939
6940 // Try to match one of following patterns:
6941 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6942 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6943 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6944 // itself.
6945 SDValue CombinedShifts, W;
6946 SDValue R0 = RightHand.getOperand(0);
6947 SDValue R1 = RightHand.getOperand(1);
6948 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6949 W = R1;
6950 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6951 W = R0;
6952 else
6953 return SDValue();
6954
6955 EVT VT = N->getValueType(0);
6956 SDLoc DL(N);
6957 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6958}
6959
6960SDValue DAGCombiner::visitAND(SDNode *N) {
6961 SDValue N0 = N->getOperand(0);
6962 SDValue N1 = N->getOperand(1);
6963 EVT VT = N1.getValueType();
6964 SDLoc DL(N);
6965
6966 // x & x --> x
6967 if (N0 == N1)
6968 return N0;
6969
6970 // fold (and c1, c2) -> c1&c2
6971 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6972 return C;
6973
6974 // canonicalize constant to RHS
6977 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6978
6979 if (areBitwiseNotOfEachother(N0, N1))
6980 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6981
6982 // fold vector ops
6983 if (VT.isVector()) {
6984 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6985 return FoldedVOp;
6986
6987 // fold (and x, 0) -> 0, vector edition
6989 // do not return N1, because undef node may exist in N1
6991 N1.getValueType());
6992
6993 // fold (and x, -1) -> x, vector edition
6995 return N0;
6996
6997 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6998 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6999 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
7000 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
7001 N1.hasOneUse()) {
7002 EVT LoadVT = MLoad->getMemoryVT();
7003 EVT ExtVT = VT;
7004 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
7005 // For this AND to be a zero extension of the masked load the elements
7006 // of the BuildVec must mask the bottom bits of the extended element
7007 // type
7008 uint64_t ElementSize =
7010 if (Splat->getAPIntValue().isMask(ElementSize)) {
7011 SDValue NewLoad = DAG.getMaskedLoad(
7012 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
7013 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
7014 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7015 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7016 bool LoadHasOtherUsers = !N0.hasOneUse();
7017 CombineTo(N, NewLoad);
7018 if (LoadHasOtherUsers)
7019 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7020 return SDValue(N, 0);
7021 }
7022 }
7023 }
7024 }
7025
7026 // fold (and x, -1) -> x
7027 if (isAllOnesConstant(N1))
7028 return N0;
7029
7030 // if (and x, c) is known to be zero, return 0
7031 unsigned BitWidth = VT.getScalarSizeInBits();
7034 return DAG.getConstant(0, DL, VT);
7035
7036 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7037 return R;
7038
7039 if (SDValue NewSel = foldBinOpIntoSelect(N))
7040 return NewSel;
7041
7042 // reassociate and
7043 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7044 return RAND;
7045
7046 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7047 if (SDValue SD =
7048 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7049 return SD;
7050
7051 // fold (and (or x, C), D) -> D if (C & D) == D
7052 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7053 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7054 };
7055 if (N0.getOpcode() == ISD::OR &&
7056 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7057 return N1;
7058
7059 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7060 SDValue N0Op0 = N0.getOperand(0);
7061 EVT SrcVT = N0Op0.getValueType();
7062 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7063 APInt Mask = ~N1C->getAPIntValue();
7064 Mask = Mask.trunc(SrcBitWidth);
7065
7066 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7067 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7068 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7069
7070 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7071 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7072 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7073 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7074 TLI.isNarrowingProfitable(VT, SrcVT))
7075 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7076 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7077 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7078 }
7079
7080 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7081 if (ISD::isExtOpcode(N0.getOpcode())) {
7082 unsigned ExtOpc = N0.getOpcode();
7083 SDValue N0Op0 = N0.getOperand(0);
7084 if (N0Op0.getOpcode() == ISD::AND &&
7085 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7088 N0->hasOneUse() && N0Op0->hasOneUse()) {
7089 SDValue NewMask =
7090 DAG.getNode(ISD::AND, DL, VT, N1,
7091 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
7092 return DAG.getNode(ISD::AND, DL, VT,
7093 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7094 NewMask);
7095 }
7096 }
7097
7098 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7099 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7100 // already be zero by virtue of the width of the base type of the load.
7101 //
7102 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7103 // more cases.
7104 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7106 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7107 N0.getOperand(0).getResNo() == 0) ||
7108 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7109 auto *Load =
7110 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7111
7112 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7113 // This can be a pure constant or a vector splat, in which case we treat the
7114 // vector as a scalar and use the splat value.
7117 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7118 Constant = C->getAPIntValue();
7119 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7120 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7121 APInt SplatValue, SplatUndef;
7122 unsigned SplatBitSize;
7123 bool HasAnyUndefs;
7124 // Endianness should not matter here. Code below makes sure that we only
7125 // use the result if the SplatBitSize is a multiple of the vector element
7126 // size. And after that we AND all element sized parts of the splat
7127 // together. So the end result should be the same regardless of in which
7128 // order we do those operations.
7129 const bool IsBigEndian = false;
7130 bool IsSplat =
7131 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7132 HasAnyUndefs, EltBitWidth, IsBigEndian);
7133
7134 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7135 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7136 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7137 // Undef bits can contribute to a possible optimisation if set, so
7138 // set them.
7139 SplatValue |= SplatUndef;
7140
7141 // The splat value may be something like "0x00FFFFFF", which means 0 for
7142 // the first vector value and FF for the rest, repeating. We need a mask
7143 // that will apply equally to all members of the vector, so AND all the
7144 // lanes of the constant together.
7145 Constant = APInt::getAllOnes(EltBitWidth);
7146 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7147 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7148 }
7149 }
7150
7151 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7152 // actually legal and isn't going to get expanded, else this is a false
7153 // optimisation.
7154 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7155 Load->getValueType(0),
7156 Load->getMemoryVT());
7157
7158 // Resize the constant to the same size as the original memory access before
7159 // extension. If it is still the AllOnesValue then this AND is completely
7160 // unneeded.
7161 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7162
7163 bool B;
7164 switch (Load->getExtensionType()) {
7165 default: B = false; break;
7166 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7167 case ISD::ZEXTLOAD:
7168 case ISD::NON_EXTLOAD: B = true; break;
7169 }
7170
7171 if (B && Constant.isAllOnes()) {
7172 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7173 // preserve semantics once we get rid of the AND.
7174 SDValue NewLoad(Load, 0);
7175
7176 // Fold the AND away. NewLoad may get replaced immediately.
7177 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7178
7179 if (Load->getExtensionType() == ISD::EXTLOAD) {
7180 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7181 Load->getValueType(0), SDLoc(Load),
7182 Load->getChain(), Load->getBasePtr(),
7183 Load->getOffset(), Load->getMemoryVT(),
7184 Load->getMemOperand());
7185 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7186 if (Load->getNumValues() == 3) {
7187 // PRE/POST_INC loads have 3 values.
7188 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7189 NewLoad.getValue(2) };
7190 CombineTo(Load, To, 3, true);
7191 } else {
7192 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7193 }
7194 }
7195
7196 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7197 }
7198 }
7199
7200 // Try to convert a constant mask AND into a shuffle clear mask.
7201 if (VT.isVector())
7202 if (SDValue Shuffle = XformToShuffleWithZero(N))
7203 return Shuffle;
7204
7205 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7206 return Combined;
7207
7208 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7210 SDValue Ext = N0.getOperand(0);
7211 EVT ExtVT = Ext->getValueType(0);
7212 SDValue Extendee = Ext->getOperand(0);
7213
7214 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7215 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7216 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7217 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7218 // => (extract_subvector (iN_zeroext v))
7219 SDValue ZeroExtExtendee =
7220 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7221
7222 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7223 N0.getOperand(1));
7224 }
7225 }
7226
7227 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7228 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7229 EVT MemVT = GN0->getMemoryVT();
7230 EVT ScalarVT = MemVT.getScalarType();
7231
7232 if (SDValue(GN0, 0).hasOneUse() &&
7233 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7235 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7236 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7237
7238 SDValue ZExtLoad = DAG.getMaskedGather(
7239 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7240 GN0->getIndexType(), ISD::ZEXTLOAD);
7241
7242 CombineTo(N, ZExtLoad);
7243 AddToWorklist(ZExtLoad.getNode());
7244 // Avoid recheck of N.
7245 return SDValue(N, 0);
7246 }
7247 }
7248
7249 // fold (and (load x), 255) -> (zextload x, i8)
7250 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7251 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7252 if (SDValue Res = reduceLoadWidth(N))
7253 return Res;
7254
7255 if (LegalTypes) {
7256 // Attempt to propagate the AND back up to the leaves which, if they're
7257 // loads, can be combined to narrow loads and the AND node can be removed.
7258 // Perform after legalization so that extend nodes will already be
7259 // combined into the loads.
7260 if (BackwardsPropagateMask(N))
7261 return SDValue(N, 0);
7262 }
7263
7264 if (SDValue Combined = visitANDLike(N0, N1, N))
7265 return Combined;
7266
7267 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7268 if (N0.getOpcode() == N1.getOpcode())
7269 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7270 return V;
7271
7272 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7273 return R;
7274 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7275 return R;
7276
7277 // Masking the negated extension of a boolean is just the zero-extended
7278 // boolean:
7279 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7280 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7281 //
7282 // Note: the SimplifyDemandedBits fold below can make an information-losing
7283 // transform, and then we have no way to find this better fold.
7284 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7285 if (isNullOrNullSplat(N0.getOperand(0))) {
7286 SDValue SubRHS = N0.getOperand(1);
7287 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7288 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7289 return SubRHS;
7290 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7291 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7292 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7293 }
7294 }
7295
7296 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7297 // fold (and (sra)) -> (and (srl)) when possible.
7299 return SDValue(N, 0);
7300
7301 // fold (zext_inreg (extload x)) -> (zextload x)
7302 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7303 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7304 (ISD::isEXTLoad(N0.getNode()) ||
7305 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7306 auto *LN0 = cast<LoadSDNode>(N0);
7307 EVT MemVT = LN0->getMemoryVT();
7308 // If we zero all the possible extended bits, then we can turn this into
7309 // a zextload if we are running before legalize or the operation is legal.
7310 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7311 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7312 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7313 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7314 ((!LegalOperations && LN0->isSimple()) ||
7315 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7316 SDValue ExtLoad =
7317 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7318 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7319 AddToWorklist(N);
7320 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7321 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7322 }
7323 }
7324
7325 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7326 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7327 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7328 N0.getOperand(1), false))
7329 return BSwap;
7330 }
7331
7332 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7333 return Shifts;
7334
7335 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7336 return V;
7337
7338 // Recognize the following pattern:
7339 //
7340 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7341 //
7342 // where bitmask is a mask that clears the upper bits of AndVT. The
7343 // number of bits in bitmask must be a power of two.
7344 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7345 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7346 return false;
7347
7348 auto *C = dyn_cast<ConstantSDNode>(RHS);
7349 if (!C)
7350 return false;
7351
7352 if (!C->getAPIntValue().isMask(
7353 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7354 return false;
7355
7356 return true;
7357 };
7358
7359 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7360 if (IsAndZeroExtMask(N0, N1))
7361 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7362
7363 if (hasOperation(ISD::USUBSAT, VT))
7364 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7365 return V;
7366
7367 // Postpone until legalization completed to avoid interference with bswap
7368 // folding
7369 if (LegalOperations || VT.isVector())
7370 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7371 return R;
7372
7373 return SDValue();
7374}
7375
7376/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7377SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7378 bool DemandHighBits) {
7379 if (!LegalOperations)
7380 return SDValue();
7381
7382 EVT VT = N->getValueType(0);
7383 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7384 return SDValue();
7386 return SDValue();
7387
7388 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7389 bool LookPassAnd0 = false;
7390 bool LookPassAnd1 = false;
7391 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7392 std::swap(N0, N1);
7393 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7394 std::swap(N0, N1);
7395 if (N0.getOpcode() == ISD::AND) {
7396 if (!N0->hasOneUse())
7397 return SDValue();
7398 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7399 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7400 // This is needed for X86.
7401 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7402 N01C->getZExtValue() != 0xFFFF))
7403 return SDValue();
7404 N0 = N0.getOperand(0);
7405 LookPassAnd0 = true;
7406 }
7407
7408 if (N1.getOpcode() == ISD::AND) {
7409 if (!N1->hasOneUse())
7410 return SDValue();
7411 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7412 if (!N11C || N11C->getZExtValue() != 0xFF)
7413 return SDValue();
7414 N1 = N1.getOperand(0);
7415 LookPassAnd1 = true;
7416 }
7417
7418 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7419 std::swap(N0, N1);
7420 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7421 return SDValue();
7422 if (!N0->hasOneUse() || !N1->hasOneUse())
7423 return SDValue();
7424
7425 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7426 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7427 if (!N01C || !N11C)
7428 return SDValue();
7429 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7430 return SDValue();
7431
7432 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7433 SDValue N00 = N0->getOperand(0);
7434 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7435 if (!N00->hasOneUse())
7436 return SDValue();
7437 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7438 if (!N001C || N001C->getZExtValue() != 0xFF)
7439 return SDValue();
7440 N00 = N00.getOperand(0);
7441 LookPassAnd0 = true;
7442 }
7443
7444 SDValue N10 = N1->getOperand(0);
7445 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7446 if (!N10->hasOneUse())
7447 return SDValue();
7448 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7449 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7450 // for X86.
7451 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7452 N101C->getZExtValue() != 0xFFFF))
7453 return SDValue();
7454 N10 = N10.getOperand(0);
7455 LookPassAnd1 = true;
7456 }
7457
7458 if (N00 != N10)
7459 return SDValue();
7460
7461 // Make sure everything beyond the low halfword gets set to zero since the SRL
7462 // 16 will clear the top bits.
7463 unsigned OpSizeInBits = VT.getSizeInBits();
7464 if (OpSizeInBits > 16) {
7465 // If the left-shift isn't masked out then the only way this is a bswap is
7466 // if all bits beyond the low 8 are 0. In that case the entire pattern
7467 // reduces to a left shift anyway: leave it for other parts of the combiner.
7468 if (DemandHighBits && !LookPassAnd0)
7469 return SDValue();
7470
7471 // However, if the right shift isn't masked out then it might be because
7472 // it's not needed. See if we can spot that too. If the high bits aren't
7473 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7474 // upper bits to be zero.
7475 if (!LookPassAnd1) {
7476 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7477 if (!DAG.MaskedValueIsZero(N10,
7478 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7479 return SDValue();
7480 }
7481 }
7482
7483 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7484 if (OpSizeInBits > 16) {
7485 SDLoc DL(N);
7486 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7487 DAG.getConstant(OpSizeInBits - 16, DL,
7488 getShiftAmountTy(VT)));
7489 }
7490 return Res;
7491}
7492
7493/// Return true if the specified node is an element that makes up a 32-bit
7494/// packed halfword byteswap.
7495/// ((x & 0x000000ff) << 8) |
7496/// ((x & 0x0000ff00) >> 8) |
7497/// ((x & 0x00ff0000) << 8) |
7498/// ((x & 0xff000000) >> 8)
7500 if (!N->hasOneUse())
7501 return false;
7502
7503 unsigned Opc = N.getOpcode();
7504 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7505 return false;
7506
7507 SDValue N0 = N.getOperand(0);
7508 unsigned Opc0 = N0.getOpcode();
7509 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7510 return false;
7511
7512 ConstantSDNode *N1C = nullptr;
7513 // SHL or SRL: look upstream for AND mask operand
7514 if (Opc == ISD::AND)
7515 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7516 else if (Opc0 == ISD::AND)
7517 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7518 if (!N1C)
7519 return false;
7520
7521 unsigned MaskByteOffset;
7522 switch (N1C->getZExtValue()) {
7523 default:
7524 return false;
7525 case 0xFF: MaskByteOffset = 0; break;
7526 case 0xFF00: MaskByteOffset = 1; break;
7527 case 0xFFFF:
7528 // In case demanded bits didn't clear the bits that will be shifted out.
7529 // This is needed for X86.
7530 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7531 MaskByteOffset = 1;
7532 break;
7533 }
7534 return false;
7535 case 0xFF0000: MaskByteOffset = 2; break;
7536 case 0xFF000000: MaskByteOffset = 3; break;
7537 }
7538
7539 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7540 if (Opc == ISD::AND) {
7541 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7542 // (x >> 8) & 0xff
7543 // (x >> 8) & 0xff0000
7544 if (Opc0 != ISD::SRL)
7545 return false;
7546 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7547 if (!C || C->getZExtValue() != 8)
7548 return false;
7549 } else {
7550 // (x << 8) & 0xff00
7551 // (x << 8) & 0xff000000
7552 if (Opc0 != ISD::SHL)
7553 return false;
7554 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7555 if (!C || C->getZExtValue() != 8)
7556 return false;
7557 }
7558 } else if (Opc == ISD::SHL) {
7559 // (x & 0xff) << 8
7560 // (x & 0xff0000) << 8
7561 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7562 return false;
7563 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7564 if (!C || C->getZExtValue() != 8)
7565 return false;
7566 } else { // Opc == ISD::SRL
7567 // (x & 0xff00) >> 8
7568 // (x & 0xff000000) >> 8
7569 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7570 return false;
7571 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7572 if (!C || C->getZExtValue() != 8)
7573 return false;
7574 }
7575
7576 if (Parts[MaskByteOffset])
7577 return false;
7578
7579 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7580 return true;
7581}
7582
7583// Match 2 elements of a packed halfword bswap.
7585 if (N.getOpcode() == ISD::OR)
7586 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7587 isBSwapHWordElement(N.getOperand(1), Parts);
7588
7589 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7590 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7591 if (!C || C->getAPIntValue() != 16)
7592 return false;
7593 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7594 return true;
7595 }
7596
7597 return false;
7598}
7599
7600// Match this pattern:
7601// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7602// And rewrite this to:
7603// (rotr (bswap A), 16)
7605 SelectionDAG &DAG, SDNode *N, SDValue N0,
7606 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7607 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7608 "MatchBSwapHWordOrAndAnd: expecting i32");
7609 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7610 return SDValue();
7611 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7612 return SDValue();
7613 // TODO: this is too restrictive; lifting this restriction requires more tests
7614 if (!N0->hasOneUse() || !N1->hasOneUse())
7615 return SDValue();
7618 if (!Mask0 || !Mask1)
7619 return SDValue();
7620 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7621 Mask1->getAPIntValue() != 0x00ff00ff)
7622 return SDValue();
7623 SDValue Shift0 = N0.getOperand(0);
7624 SDValue Shift1 = N1.getOperand(0);
7625 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7626 return SDValue();
7627 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7628 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7629 if (!ShiftAmt0 || !ShiftAmt1)
7630 return SDValue();
7631 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7632 return SDValue();
7633 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7634 return SDValue();
7635
7636 SDLoc DL(N);
7637 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7638 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7639 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7640}
7641
7642/// Match a 32-bit packed halfword bswap. That is
7643/// ((x & 0x000000ff) << 8) |
7644/// ((x & 0x0000ff00) >> 8) |
7645/// ((x & 0x00ff0000) << 8) |
7646/// ((x & 0xff000000) >> 8)
7647/// => (rotl (bswap x), 16)
7648SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7649 if (!LegalOperations)
7650 return SDValue();
7651
7652 EVT VT = N->getValueType(0);
7653 if (VT != MVT::i32)
7654 return SDValue();
7656 return SDValue();
7657
7658 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7659 getShiftAmountTy(VT)))
7660 return BSwap;
7661
7662 // Try again with commuted operands.
7663 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7664 getShiftAmountTy(VT)))
7665 return BSwap;
7666
7667
7668 // Look for either
7669 // (or (bswaphpair), (bswaphpair))
7670 // (or (or (bswaphpair), (and)), (and))
7671 // (or (or (and), (bswaphpair)), (and))
7672 SDNode *Parts[4] = {};
7673
7674 if (isBSwapHWordPair(N0, Parts)) {
7675 // (or (or (and), (and)), (or (and), (and)))
7676 if (!isBSwapHWordPair(N1, Parts))
7677 return SDValue();
7678 } else if (N0.getOpcode() == ISD::OR) {
7679 // (or (or (or (and), (and)), (and)), (and))
7680 if (!isBSwapHWordElement(N1, Parts))
7681 return SDValue();
7682 SDValue N00 = N0.getOperand(0);
7683 SDValue N01 = N0.getOperand(1);
7684 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7685 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7686 return SDValue();
7687 } else {
7688 return SDValue();
7689 }
7690
7691 // Make sure the parts are all coming from the same node.
7692 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7693 return SDValue();
7694
7695 SDLoc DL(N);
7696 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7697 SDValue(Parts[0], 0));
7698
7699 // Result of the bswap should be rotated by 16. If it's not legal, then
7700 // do (x << 16) | (x >> 16).
7701 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7703 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7705 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7706 return DAG.getNode(ISD::OR, DL, VT,
7707 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7708 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7709}
7710
7711/// This contains all DAGCombine rules which reduce two values combined by
7712/// an Or operation to a single value \see visitANDLike().
7713SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7714 EVT VT = N1.getValueType();
7715
7716 // fold (or x, undef) -> -1
7717 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7718 return DAG.getAllOnesConstant(DL, VT);
7719
7720 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7721 return V;
7722
7723 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7724 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7725 // Don't increase # computations.
7726 (N0->hasOneUse() || N1->hasOneUse())) {
7727 // We can only do this xform if we know that bits from X that are set in C2
7728 // but not in C1 are already zero. Likewise for Y.
7729 if (const ConstantSDNode *N0O1C =
7731 if (const ConstantSDNode *N1O1C =
7733 // We can only do this xform if we know that bits from X that are set in
7734 // C2 but not in C1 are already zero. Likewise for Y.
7735 const APInt &LHSMask = N0O1C->getAPIntValue();
7736 const APInt &RHSMask = N1O1C->getAPIntValue();
7737
7738 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7739 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7740 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7741 N0.getOperand(0), N1.getOperand(0));
7742 return DAG.getNode(ISD::AND, DL, VT, X,
7743 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7744 }
7745 }
7746 }
7747 }
7748
7749 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7750 if (N0.getOpcode() == ISD::AND &&
7751 N1.getOpcode() == ISD::AND &&
7752 N0.getOperand(0) == N1.getOperand(0) &&
7753 // Don't increase # computations.
7754 (N0->hasOneUse() || N1->hasOneUse())) {
7755 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7756 N0.getOperand(1), N1.getOperand(1));
7757 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7758 }
7759
7760 return SDValue();
7761}
7762
7763/// OR combines for which the commuted variant will be tried as well.
7765 SDNode *N) {
7766 EVT VT = N0.getValueType();
7767 unsigned BW = VT.getScalarSizeInBits();
7768 SDLoc DL(N);
7769
7770 auto peekThroughResize = [](SDValue V) {
7771 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7772 return V->getOperand(0);
7773 return V;
7774 };
7775
7776 SDValue N0Resized = peekThroughResize(N0);
7777 if (N0Resized.getOpcode() == ISD::AND) {
7778 SDValue N1Resized = peekThroughResize(N1);
7779 SDValue N00 = N0Resized.getOperand(0);
7780 SDValue N01 = N0Resized.getOperand(1);
7781
7782 // fold or (and x, y), x --> x
7783 if (N00 == N1Resized || N01 == N1Resized)
7784 return N1;
7785
7786 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7787 // TODO: Set AllowUndefs = true.
7788 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7789 /* AllowUndefs */ false)) {
7790 if (peekThroughResize(NotOperand) == N1Resized)
7791 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7792 N1);
7793 }
7794
7795 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7796 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7797 /* AllowUndefs */ false)) {
7798 if (peekThroughResize(NotOperand) == N1Resized)
7799 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7800 N1);
7801 }
7802 }
7803
7804 SDValue X, Y;
7805
7806 // fold or (xor X, N1), N1 --> or X, N1
7807 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7808 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7809
7810 // fold or (xor x, y), (x and/or y) --> or x, y
7811 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7812 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7814 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7815
7816 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7817 return R;
7818
7819 auto peekThroughZext = [](SDValue V) {
7820 if (V->getOpcode() == ISD::ZERO_EXTEND)
7821 return V->getOperand(0);
7822 return V;
7823 };
7824
7825 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7826 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7827 N0.getOperand(0) == N1.getOperand(0) &&
7828 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7829 return N0;
7830
7831 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7832 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7833 N0.getOperand(1) == N1.getOperand(0) &&
7834 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7835 return N0;
7836
7837 // Attempt to match a legalized build_pair-esque pattern:
7838 // or(shl(aext(Hi),BW/2),zext(Lo))
7839 SDValue Lo, Hi;
7840 if (sd_match(N0,
7842 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7843 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7844 Lo.getValueType() == Hi.getValueType()) {
7845 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7846 SDValue NotLo, NotHi;
7847 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7848 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7849 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7850 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7851 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7852 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7853 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7854 }
7855 }
7856
7857 return SDValue();
7858}
7859
7860SDValue DAGCombiner::visitOR(SDNode *N) {
7861 SDValue N0 = N->getOperand(0);
7862 SDValue N1 = N->getOperand(1);
7863 EVT VT = N1.getValueType();
7864 SDLoc DL(N);
7865
7866 // x | x --> x
7867 if (N0 == N1)
7868 return N0;
7869
7870 // fold (or c1, c2) -> c1|c2
7871 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7872 return C;
7873
7874 // canonicalize constant to RHS
7877 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7878
7879 // fold vector ops
7880 if (VT.isVector()) {
7881 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7882 return FoldedVOp;
7883
7884 // fold (or x, 0) -> x, vector edition
7886 return N0;
7887
7888 // fold (or x, -1) -> -1, vector edition
7890 // do not return N1, because undef node may exist in N1
7891 return DAG.getAllOnesConstant(DL, N1.getValueType());
7892
7893 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7894 // Do this only if the resulting type / shuffle is legal.
7895 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7896 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7897 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7898 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7899 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7900 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7901 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7902 // Ensure both shuffles have a zero input.
7903 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7904 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7905 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7906 bool CanFold = true;
7907 int NumElts = VT.getVectorNumElements();
7908 SmallVector<int, 4> Mask(NumElts, -1);
7909
7910 for (int i = 0; i != NumElts; ++i) {
7911 int M0 = SV0->getMaskElt(i);
7912 int M1 = SV1->getMaskElt(i);
7913
7914 // Determine if either index is pointing to a zero vector.
7915 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7916 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7917
7918 // If one element is zero and the otherside is undef, keep undef.
7919 // This also handles the case that both are undef.
7920 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7921 continue;
7922
7923 // Make sure only one of the elements is zero.
7924 if (M0Zero == M1Zero) {
7925 CanFold = false;
7926 break;
7927 }
7928
7929 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7930
7931 // We have a zero and non-zero element. If the non-zero came from
7932 // SV0 make the index a LHS index. If it came from SV1, make it
7933 // a RHS index. We need to mod by NumElts because we don't care
7934 // which operand it came from in the original shuffles.
7935 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7936 }
7937
7938 if (CanFold) {
7939 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7940 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7941 SDValue LegalShuffle =
7942 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7943 if (LegalShuffle)
7944 return LegalShuffle;
7945 }
7946 }
7947 }
7948 }
7949
7950 // fold (or x, 0) -> x
7951 if (isNullConstant(N1))
7952 return N0;
7953
7954 // fold (or x, -1) -> -1
7955 if (isAllOnesConstant(N1))
7956 return N1;
7957
7958 if (SDValue NewSel = foldBinOpIntoSelect(N))
7959 return NewSel;
7960
7961 // fold (or x, c) -> c iff (x & ~c) == 0
7962 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7963 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7964 return N1;
7965
7966 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7967 return R;
7968
7969 if (SDValue Combined = visitORLike(N0, N1, DL))
7970 return Combined;
7971
7972 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7973 return Combined;
7974
7975 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7976 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7977 return BSwap;
7978 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7979 return BSwap;
7980
7981 // reassociate or
7982 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7983 return ROR;
7984
7985 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7986 if (SDValue SD =
7987 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7988 return SD;
7989
7990 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7991 // iff (c1 & c2) != 0 or c1/c2 are undef.
7992 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7993 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7994 };
7995 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7996 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7997 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7998 {N1, N0.getOperand(1)})) {
7999 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
8000 AddToWorklist(IOR.getNode());
8001 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
8002 }
8003 }
8004
8005 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
8006 return Combined;
8007 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
8008 return Combined;
8009
8010 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
8011 if (N0.getOpcode() == N1.getOpcode())
8012 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8013 return V;
8014
8015 // See if this is some rotate idiom.
8016 if (SDValue Rot = MatchRotate(N0, N1, DL))
8017 return Rot;
8018
8019 if (SDValue Load = MatchLoadCombine(N))
8020 return Load;
8021
8022 // Simplify the operands using demanded-bits information.
8024 return SDValue(N, 0);
8025
8026 // If OR can be rewritten into ADD, try combines based on ADD.
8027 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8028 DAG.isADDLike(SDValue(N, 0)))
8029 if (SDValue Combined = visitADDLike(N))
8030 return Combined;
8031
8032 // Postpone until legalization completed to avoid interference with bswap
8033 // folding
8034 if (LegalOperations || VT.isVector())
8035 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8036 return R;
8037
8038 return SDValue();
8039}
8040
8042 SDValue &Mask) {
8043 if (Op.getOpcode() == ISD::AND &&
8044 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8045 Mask = Op.getOperand(1);
8046 return Op.getOperand(0);
8047 }
8048 return Op;
8049}
8050
8051/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8052static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8053 SDValue &Mask) {
8054 Op = stripConstantMask(DAG, Op, Mask);
8055 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8056 Shift = Op;
8057 return true;
8058 }
8059 return false;
8060}
8061
8062/// Helper function for visitOR to extract the needed side of a rotate idiom
8063/// from a shl/srl/mul/udiv. This is meant to handle cases where
8064/// InstCombine merged some outside op with one of the shifts from
8065/// the rotate pattern.
8066/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8067/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8068/// patterns:
8069///
8070/// (or (add v v) (shrl v bitwidth-1)):
8071/// expands (add v v) -> (shl v 1)
8072///
8073/// (or (mul v c0) (shrl (mul v c1) c2)):
8074/// expands (mul v c0) -> (shl (mul v c1) c3)
8075///
8076/// (or (udiv v c0) (shl (udiv v c1) c2)):
8077/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8078///
8079/// (or (shl v c0) (shrl (shl v c1) c2)):
8080/// expands (shl v c0) -> (shl (shl v c1) c3)
8081///
8082/// (or (shrl v c0) (shl (shrl v c1) c2)):
8083/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8084///
8085/// Such that in all cases, c3+c2==bitwidth(op v c1).
8087 SDValue ExtractFrom, SDValue &Mask,
8088 const SDLoc &DL) {
8089 assert(OppShift && ExtractFrom && "Empty SDValue");
8090 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8091 return SDValue();
8092
8093 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8094
8095 // Value and Type of the shift.
8096 SDValue OppShiftLHS = OppShift.getOperand(0);
8097 EVT ShiftedVT = OppShiftLHS.getValueType();
8098
8099 // Amount of the existing shift.
8100 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8101
8102 // (add v v) -> (shl v 1)
8103 // TODO: Should this be a general DAG canonicalization?
8104 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8105 ExtractFrom.getOpcode() == ISD::ADD &&
8106 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8107 ExtractFrom.getOperand(0) == OppShiftLHS &&
8108 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8109 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8110 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8111
8112 // Preconditions:
8113 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8114 //
8115 // Find opcode of the needed shift to be extracted from (op0 v c0).
8116 unsigned Opcode = ISD::DELETED_NODE;
8117 bool IsMulOrDiv = false;
8118 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8119 // opcode or its arithmetic (mul or udiv) variant.
8120 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8121 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8122 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8123 return false;
8124 Opcode = NeededShift;
8125 return true;
8126 };
8127 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8128 // that the needed shift can be extracted from.
8129 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8130 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8131 return SDValue();
8132
8133 // op0 must be the same opcode on both sides, have the same LHS argument,
8134 // and produce the same value type.
8135 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8136 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8137 ShiftedVT != ExtractFrom.getValueType())
8138 return SDValue();
8139
8140 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8141 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8142 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8143 ConstantSDNode *ExtractFromCst =
8144 isConstOrConstSplat(ExtractFrom.getOperand(1));
8145 // TODO: We should be able to handle non-uniform constant vectors for these values
8146 // Check that we have constant values.
8147 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8148 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8149 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8150 return SDValue();
8151
8152 // Compute the shift amount we need to extract to complete the rotate.
8153 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8154 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8155 return SDValue();
8156 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8157 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8158 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8159 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8160 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8161
8162 // Now try extract the needed shift from the ExtractFrom op and see if the
8163 // result matches up with the existing shift's LHS op.
8164 if (IsMulOrDiv) {
8165 // Op to extract from is a mul or udiv by a constant.
8166 // Check:
8167 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8168 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8169 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8170 NeededShiftAmt.getZExtValue());
8171 APInt ResultAmt;
8172 APInt Rem;
8173 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8174 if (Rem != 0 || ResultAmt != OppLHSAmt)
8175 return SDValue();
8176 } else {
8177 // Op to extract from is a shift by a constant.
8178 // Check:
8179 // c2 - (bitwidth(op0 v c0) - c1) == c0
8180 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8181 ExtractFromAmt.getBitWidth()))
8182 return SDValue();
8183 }
8184
8185 // Return the expanded shift op that should allow a rotate to be formed.
8186 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8187 EVT ResVT = ExtractFrom.getValueType();
8188 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8189 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8190}
8191
8192// Return true if we can prove that, whenever Neg and Pos are both in the
8193// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8194// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8195//
8196// (or (shift1 X, Neg), (shift2 X, Pos))
8197//
8198// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8199// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8200// to consider shift amounts with defined behavior.
8201//
8202// The IsRotate flag should be set when the LHS of both shifts is the same.
8203// Otherwise if matching a general funnel shift, it should be clear.
8204static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8205 SelectionDAG &DAG, bool IsRotate) {
8206 const auto &TLI = DAG.getTargetLoweringInfo();
8207 // If EltSize is a power of 2 then:
8208 //
8209 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8210 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8211 //
8212 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8213 // for the stronger condition:
8214 //
8215 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8216 //
8217 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8218 // we can just replace Neg with Neg' for the rest of the function.
8219 //
8220 // In other cases we check for the even stronger condition:
8221 //
8222 // Neg == EltSize - Pos [B]
8223 //
8224 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8225 // behavior if Pos == 0 (and consequently Neg == EltSize).
8226 //
8227 // We could actually use [A] whenever EltSize is a power of 2, but the
8228 // only extra cases that it would match are those uninteresting ones
8229 // where Neg and Pos are never in range at the same time. E.g. for
8230 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8231 // as well as (sub 32, Pos), but:
8232 //
8233 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8234 //
8235 // always invokes undefined behavior for 32-bit X.
8236 //
8237 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8238 // This allows us to peek through any operations that only affect Mask's
8239 // un-demanded bits.
8240 //
8241 // NOTE: We can only do this when matching operations which won't modify the
8242 // least Log2(EltSize) significant bits and not a general funnel shift.
8243 unsigned MaskLoBits = 0;
8244 if (IsRotate && isPowerOf2_64(EltSize)) {
8245 unsigned Bits = Log2_64(EltSize);
8246 unsigned NegBits = Neg.getScalarValueSizeInBits();
8247 if (NegBits >= Bits) {
8248 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8249 if (SDValue Inner =
8251 Neg = Inner;
8252 MaskLoBits = Bits;
8253 }
8254 }
8255 }
8256
8257 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8258 if (Neg.getOpcode() != ISD::SUB)
8259 return false;
8261 if (!NegC)
8262 return false;
8263 SDValue NegOp1 = Neg.getOperand(1);
8264
8265 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8266 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8267 // are redundant for the purpose of the equality.
8268 if (MaskLoBits) {
8269 unsigned PosBits = Pos.getScalarValueSizeInBits();
8270 if (PosBits >= MaskLoBits) {
8271 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8272 if (SDValue Inner =
8274 Pos = Inner;
8275 }
8276 }
8277 }
8278
8279 // The condition we need is now:
8280 //
8281 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8282 //
8283 // If NegOp1 == Pos then we need:
8284 //
8285 // EltSize & Mask == NegC & Mask
8286 //
8287 // (because "x & Mask" is a truncation and distributes through subtraction).
8288 //
8289 // We also need to account for a potential truncation of NegOp1 if the amount
8290 // has already been legalized to a shift amount type.
8291 APInt Width;
8292 if ((Pos == NegOp1) ||
8293 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8294 Width = NegC->getAPIntValue();
8295
8296 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8297 // Then the condition we want to prove becomes:
8298 //
8299 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8300 //
8301 // which, again because "x & Mask" is a truncation, becomes:
8302 //
8303 // NegC & Mask == (EltSize - PosC) & Mask
8304 // EltSize & Mask == (NegC + PosC) & Mask
8305 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8306 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8307 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8308 else
8309 return false;
8310 } else
8311 return false;
8312
8313 // Now we just need to check that EltSize & Mask == Width & Mask.
8314 if (MaskLoBits)
8315 // EltSize & Mask is 0 since Mask is EltSize - 1.
8316 return Width.getLoBits(MaskLoBits) == 0;
8317 return Width == EltSize;
8318}
8319
8320// A subroutine of MatchRotate used once we have found an OR of two opposite
8321// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8322// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8323// former being preferred if supported. InnerPos and InnerNeg are Pos and
8324// Neg with outer conversions stripped away.
8325SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8326 SDValue Neg, SDValue InnerPos,
8327 SDValue InnerNeg, bool HasPos,
8328 unsigned PosOpcode, unsigned NegOpcode,
8329 const SDLoc &DL) {
8330 // fold (or (shl x, (*ext y)),
8331 // (srl x, (*ext (sub 32, y)))) ->
8332 // (rotl x, y) or (rotr x, (sub 32, y))
8333 //
8334 // fold (or (shl x, (*ext (sub 32, y))),
8335 // (srl x, (*ext y))) ->
8336 // (rotr x, y) or (rotl x, (sub 32, y))
8337 EVT VT = Shifted.getValueType();
8338 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8339 /*IsRotate*/ true)) {
8340 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8341 HasPos ? Pos : Neg);
8342 }
8343
8344 return SDValue();
8345}
8346
8347// A subroutine of MatchRotate used once we have found an OR of two opposite
8348// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8349// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8350// former being preferred if supported. InnerPos and InnerNeg are Pos and
8351// Neg with outer conversions stripped away.
8352// TODO: Merge with MatchRotatePosNeg.
8353SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8354 SDValue Neg, SDValue InnerPos,
8355 SDValue InnerNeg, bool HasPos,
8356 unsigned PosOpcode, unsigned NegOpcode,
8357 const SDLoc &DL) {
8358 EVT VT = N0.getValueType();
8359 unsigned EltBits = VT.getScalarSizeInBits();
8360
8361 // fold (or (shl x0, (*ext y)),
8362 // (srl x1, (*ext (sub 32, y)))) ->
8363 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8364 //
8365 // fold (or (shl x0, (*ext (sub 32, y))),
8366 // (srl x1, (*ext y))) ->
8367 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8368 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8369 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8370 HasPos ? Pos : Neg);
8371 }
8372
8373 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8374 // so for now just use the PosOpcode case if its legal.
8375 // TODO: When can we use the NegOpcode case?
8376 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8377 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8378 if (Op.getOpcode() != BinOpc)
8379 return false;
8380 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8381 return Cst && (Cst->getAPIntValue() == Imm);
8382 };
8383
8384 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8385 // -> (fshl x0, x1, y)
8386 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8387 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8388 InnerPos == InnerNeg.getOperand(0) &&
8390 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8391 }
8392
8393 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8394 // -> (fshr x0, x1, y)
8395 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8396 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8397 InnerNeg == InnerPos.getOperand(0) &&
8399 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8400 }
8401
8402 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8403 // -> (fshr x0, x1, y)
8404 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8405 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8406 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8407 InnerNeg == InnerPos.getOperand(0) &&
8409 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8410 }
8411 }
8412
8413 return SDValue();
8414}
8415
8416// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8417// idioms for rotate, and if the target supports rotation instructions, generate
8418// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8419// with different shifted sources.
8420SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8421 EVT VT = LHS.getValueType();
8422
8423 // The target must have at least one rotate/funnel flavor.
8424 // We still try to match rotate by constant pre-legalization.
8425 // TODO: Support pre-legalization funnel-shift by constant.
8426 bool HasROTL = hasOperation(ISD::ROTL, VT);
8427 bool HasROTR = hasOperation(ISD::ROTR, VT);
8428 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8429 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8430
8431 // If the type is going to be promoted and the target has enabled custom
8432 // lowering for rotate, allow matching rotate by non-constants. Only allow
8433 // this for scalar types.
8434 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8438 }
8439
8440 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8441 return SDValue();
8442
8443 // Check for truncated rotate.
8444 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8445 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8446 assert(LHS.getValueType() == RHS.getValueType());
8447 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8448 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8449 }
8450 }
8451
8452 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8453 SDValue LHSShift; // The shift.
8454 SDValue LHSMask; // AND value if any.
8455 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8456
8457 SDValue RHSShift; // The shift.
8458 SDValue RHSMask; // AND value if any.
8459 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8460
8461 // If neither side matched a rotate half, bail
8462 if (!LHSShift && !RHSShift)
8463 return SDValue();
8464
8465 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8466 // side of the rotate, so try to handle that here. In all cases we need to
8467 // pass the matched shift from the opposite side to compute the opcode and
8468 // needed shift amount to extract. We still want to do this if both sides
8469 // matched a rotate half because one half may be a potential overshift that
8470 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8471 // single one).
8472
8473 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8474 if (LHSShift)
8475 if (SDValue NewRHSShift =
8476 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8477 RHSShift = NewRHSShift;
8478 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8479 if (RHSShift)
8480 if (SDValue NewLHSShift =
8481 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8482 LHSShift = NewLHSShift;
8483
8484 // If a side is still missing, nothing else we can do.
8485 if (!RHSShift || !LHSShift)
8486 return SDValue();
8487
8488 // At this point we've matched or extracted a shift op on each side.
8489
8490 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8491 return SDValue(); // Shifts must disagree.
8492
8493 // Canonicalize shl to left side in a shl/srl pair.
8494 if (RHSShift.getOpcode() == ISD::SHL) {
8495 std::swap(LHS, RHS);
8496 std::swap(LHSShift, RHSShift);
8497 std::swap(LHSMask, RHSMask);
8498 }
8499
8500 // Something has gone wrong - we've lost the shl/srl pair - bail.
8501 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8502 return SDValue();
8503
8504 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8505 SDValue LHSShiftArg = LHSShift.getOperand(0);
8506 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8507 SDValue RHSShiftArg = RHSShift.getOperand(0);
8508 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8509
8510 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8512 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8513 };
8514
8515 auto ApplyMasks = [&](SDValue Res) {
8516 // If there is an AND of either shifted operand, apply it to the result.
8517 if (LHSMask.getNode() || RHSMask.getNode()) {
8520
8521 if (LHSMask.getNode()) {
8522 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8523 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8524 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8525 }
8526 if (RHSMask.getNode()) {
8527 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8528 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8529 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8530 }
8531
8532 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8533 }
8534
8535 return Res;
8536 };
8537
8538 // TODO: Support pre-legalization funnel-shift by constant.
8539 bool IsRotate = LHSShiftArg == RHSShiftArg;
8540 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8541 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8542 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8543 // Look for a disguised rotate by constant.
8544 // The common shifted operand X may be hidden inside another 'or'.
8545 SDValue X, Y;
8546 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8547 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8548 return false;
8549 if (CommonOp == Or.getOperand(0)) {
8550 X = CommonOp;
8551 Y = Or.getOperand(1);
8552 return true;
8553 }
8554 if (CommonOp == Or.getOperand(1)) {
8555 X = CommonOp;
8556 Y = Or.getOperand(0);
8557 return true;
8558 }
8559 return false;
8560 };
8561
8562 SDValue Res;
8563 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8564 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8565 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8566 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8567 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8568 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8569 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8570 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8571 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8572 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8573 } else {
8574 return SDValue();
8575 }
8576
8577 return ApplyMasks(Res);
8578 }
8579
8580 return SDValue(); // Requires funnel shift support.
8581 }
8582
8583 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8584 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8585 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8586 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8587 // iff C1+C2 == EltSizeInBits
8588 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8589 SDValue Res;
8590 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8591 bool UseROTL = !LegalOperations || HasROTL;
8592 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8593 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8594 } else {
8595 bool UseFSHL = !LegalOperations || HasFSHL;
8596 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8597 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8598 }
8599
8600 return ApplyMasks(Res);
8601 }
8602
8603 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8604 // shift.
8605 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8606 return SDValue();
8607
8608 // If there is a mask here, and we have a variable shift, we can't be sure
8609 // that we're masking out the right stuff.
8610 if (LHSMask.getNode() || RHSMask.getNode())
8611 return SDValue();
8612
8613 // If the shift amount is sign/zext/any-extended just peel it off.
8614 SDValue LExtOp0 = LHSShiftAmt;
8615 SDValue RExtOp0 = RHSShiftAmt;
8616 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8617 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8618 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8619 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8620 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8621 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8622 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8623 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8624 LExtOp0 = LHSShiftAmt.getOperand(0);
8625 RExtOp0 = RHSShiftAmt.getOperand(0);
8626 }
8627
8628 if (IsRotate && (HasROTL || HasROTR)) {
8629 SDValue TryL =
8630 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8631 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8632 if (TryL)
8633 return TryL;
8634
8635 SDValue TryR =
8636 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8637 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8638 if (TryR)
8639 return TryR;
8640 }
8641
8642 SDValue TryL =
8643 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8644 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8645 if (TryL)
8646 return TryL;
8647
8648 SDValue TryR =
8649 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8650 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8651 if (TryR)
8652 return TryR;
8653
8654 return SDValue();
8655}
8656
8657/// Recursively traverses the expression calculating the origin of the requested
8658/// byte of the given value. Returns std::nullopt if the provider can't be
8659/// calculated.
8660///
8661/// For all the values except the root of the expression, we verify that the
8662/// value has exactly one use and if not then return std::nullopt. This way if
8663/// the origin of the byte is returned it's guaranteed that the values which
8664/// contribute to the byte are not used outside of this expression.
8665
8666/// However, there is a special case when dealing with vector loads -- we allow
8667/// more than one use if the load is a vector type. Since the values that
8668/// contribute to the byte ultimately come from the ExtractVectorElements of the
8669/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8670/// because those operations are independent from the pattern to be combined.
8671/// For vector loads, we simply care that the ByteProviders are adjacent
8672/// positions of the same vector, and their index matches the byte that is being
8673/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8674/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8675/// byte position we are trying to provide for the LoadCombine. If these do
8676/// not match, then we can not combine the vector loads. \p Index uses the
8677/// byte position we are trying to provide for and is matched against the
8678/// shl and load size. The \p Index algorithm ensures the requested byte is
8679/// provided for by the pattern, and the pattern does not over provide bytes.
8680///
8681///
8682/// The supported LoadCombine pattern for vector loads is as follows
8683/// or
8684/// / \
8685/// or shl
8686/// / \ |
8687/// or shl zext
8688/// / \ | |
8689/// shl zext zext EVE*
8690/// | | | |
8691/// zext EVE* EVE* LOAD
8692/// | | |
8693/// EVE* LOAD LOAD
8694/// |
8695/// LOAD
8696///
8697/// *ExtractVectorElement
8699
8700static std::optional<SDByteProvider>
8702 std::optional<uint64_t> VectorIndex,
8703 unsigned StartingIndex = 0) {
8704
8705 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8706 if (Depth == 10)
8707 return std::nullopt;
8708
8709 // Only allow multiple uses if the instruction is a vector load (in which
8710 // case we will use the load for every ExtractVectorElement)
8711 if (Depth && !Op.hasOneUse() &&
8712 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8713 return std::nullopt;
8714
8715 // Fail to combine if we have encountered anything but a LOAD after handling
8716 // an ExtractVectorElement.
8717 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8718 return std::nullopt;
8719
8720 unsigned BitWidth = Op.getValueSizeInBits();
8721 if (BitWidth % 8 != 0)
8722 return std::nullopt;
8723 unsigned ByteWidth = BitWidth / 8;
8724 assert(Index < ByteWidth && "invalid index requested");
8725 (void) ByteWidth;
8726
8727 switch (Op.getOpcode()) {
8728 case ISD::OR: {
8729 auto LHS =
8730 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8731 if (!LHS)
8732 return std::nullopt;
8733 auto RHS =
8734 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8735 if (!RHS)
8736 return std::nullopt;
8737
8738 if (LHS->isConstantZero())
8739 return RHS;
8740 if (RHS->isConstantZero())
8741 return LHS;
8742 return std::nullopt;
8743 }
8744 case ISD::SHL: {
8745 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8746 if (!ShiftOp)
8747 return std::nullopt;
8748
8749 uint64_t BitShift = ShiftOp->getZExtValue();
8750
8751 if (BitShift % 8 != 0)
8752 return std::nullopt;
8753 uint64_t ByteShift = BitShift / 8;
8754
8755 // If we are shifting by an amount greater than the index we are trying to
8756 // provide, then do not provide anything. Otherwise, subtract the index by
8757 // the amount we shifted by.
8758 return Index < ByteShift
8760 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8761 Depth + 1, VectorIndex, Index);
8762 }
8763 case ISD::ANY_EXTEND:
8764 case ISD::SIGN_EXTEND:
8765 case ISD::ZERO_EXTEND: {
8766 SDValue NarrowOp = Op->getOperand(0);
8767 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8768 if (NarrowBitWidth % 8 != 0)
8769 return std::nullopt;
8770 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8771
8772 if (Index >= NarrowByteWidth)
8773 return Op.getOpcode() == ISD::ZERO_EXTEND
8774 ? std::optional<SDByteProvider>(
8776 : std::nullopt;
8777 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8778 StartingIndex);
8779 }
8780 case ISD::BSWAP:
8781 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8782 Depth + 1, VectorIndex, StartingIndex);
8784 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8785 if (!OffsetOp)
8786 return std::nullopt;
8787
8788 VectorIndex = OffsetOp->getZExtValue();
8789
8790 SDValue NarrowOp = Op->getOperand(0);
8791 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8792 if (NarrowBitWidth % 8 != 0)
8793 return std::nullopt;
8794 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8795 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8796 // type, leaving the high bits undefined.
8797 if (Index >= NarrowByteWidth)
8798 return std::nullopt;
8799
8800 // Check to see if the position of the element in the vector corresponds
8801 // with the byte we are trying to provide for. In the case of a vector of
8802 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8803 // the element will provide a range of bytes. For example, if we have a
8804 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8805 // 3).
8806 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8807 return std::nullopt;
8808 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8809 return std::nullopt;
8810
8811 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8812 VectorIndex, StartingIndex);
8813 }
8814 case ISD::LOAD: {
8815 auto L = cast<LoadSDNode>(Op.getNode());
8816 if (!L->isSimple() || L->isIndexed())
8817 return std::nullopt;
8818
8819 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8820 if (NarrowBitWidth % 8 != 0)
8821 return std::nullopt;
8822 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8823
8824 // If the width of the load does not reach byte we are trying to provide for
8825 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8826 // question
8827 if (Index >= NarrowByteWidth)
8828 return L->getExtensionType() == ISD::ZEXTLOAD
8829 ? std::optional<SDByteProvider>(
8831 : std::nullopt;
8832
8833 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8834 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8835 }
8836 }
8837
8838 return std::nullopt;
8839}
8840
8841static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8842 return i;
8843}
8844
8845static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8846 return BW - i - 1;
8847}
8848
8849// Check if the bytes offsets we are looking at match with either big or
8850// little endian value loaded. Return true for big endian, false for little
8851// endian, and std::nullopt if match failed.
8852static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8853 int64_t FirstOffset) {
8854 // The endian can be decided only when it is 2 bytes at least.
8855 unsigned Width = ByteOffsets.size();
8856 if (Width < 2)
8857 return std::nullopt;
8858
8859 bool BigEndian = true, LittleEndian = true;
8860 for (unsigned i = 0; i < Width; i++) {
8861 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8862 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8863 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8864 if (!BigEndian && !LittleEndian)
8865 return std::nullopt;
8866 }
8867
8868 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8869 "little endian");
8870 return BigEndian;
8871}
8872
8873// Look through one layer of truncate or extend.
8875 switch (Value.getOpcode()) {
8876 case ISD::TRUNCATE:
8877 case ISD::ZERO_EXTEND:
8878 case ISD::SIGN_EXTEND:
8879 case ISD::ANY_EXTEND:
8880 return Value.getOperand(0);
8881 }
8882 return SDValue();
8883}
8884
8885/// Match a pattern where a wide type scalar value is stored by several narrow
8886/// stores. Fold it into a single store or a BSWAP and a store if the targets
8887/// supports it.
8888///
8889/// Assuming little endian target:
8890/// i8 *p = ...
8891/// i32 val = ...
8892/// p[0] = (val >> 0) & 0xFF;
8893/// p[1] = (val >> 8) & 0xFF;
8894/// p[2] = (val >> 16) & 0xFF;
8895/// p[3] = (val >> 24) & 0xFF;
8896/// =>
8897/// *((i32)p) = val;
8898///
8899/// i8 *p = ...
8900/// i32 val = ...
8901/// p[0] = (val >> 24) & 0xFF;
8902/// p[1] = (val >> 16) & 0xFF;
8903/// p[2] = (val >> 8) & 0xFF;
8904/// p[3] = (val >> 0) & 0xFF;
8905/// =>
8906/// *((i32)p) = BSWAP(val);
8907SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8908 // The matching looks for "store (trunc x)" patterns that appear early but are
8909 // likely to be replaced by truncating store nodes during combining.
8910 // TODO: If there is evidence that running this later would help, this
8911 // limitation could be removed. Legality checks may need to be added
8912 // for the created store and optional bswap/rotate.
8913 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8914 return SDValue();
8915
8916 // We only handle merging simple stores of 1-4 bytes.
8917 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8918 EVT MemVT = N->getMemoryVT();
8919 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8920 !N->isSimple() || N->isIndexed())
8921 return SDValue();
8922
8923 // Collect all of the stores in the chain, upto the maximum store width (i64).
8924 SDValue Chain = N->getChain();
8926 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8927 unsigned MaxWideNumBits = 64;
8928 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8929 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8930 // All stores must be the same size to ensure that we are writing all of the
8931 // bytes in the wide value.
8932 // This store should have exactly one use as a chain operand for another
8933 // store in the merging set. If there are other chain uses, then the
8934 // transform may not be safe because order of loads/stores outside of this
8935 // set may not be preserved.
8936 // TODO: We could allow multiple sizes by tracking each stored byte.
8937 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8938 Store->isIndexed() || !Store->hasOneUse())
8939 return SDValue();
8940 Stores.push_back(Store);
8941 Chain = Store->getChain();
8942 if (MaxStores < Stores.size())
8943 return SDValue();
8944 }
8945 // There is no reason to continue if we do not have at least a pair of stores.
8946 if (Stores.size() < 2)
8947 return SDValue();
8948
8949 // Handle simple types only.
8950 LLVMContext &Context = *DAG.getContext();
8951 unsigned NumStores = Stores.size();
8952 unsigned WideNumBits = NumStores * NarrowNumBits;
8953 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8954 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8955 return SDValue();
8956
8957 // Check if all bytes of the source value that we are looking at are stored
8958 // to the same base address. Collect offsets from Base address into OffsetMap.
8959 SDValue SourceValue;
8960 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8961 int64_t FirstOffset = INT64_MAX;
8962 StoreSDNode *FirstStore = nullptr;
8963 std::optional<BaseIndexOffset> Base;
8964 for (auto *Store : Stores) {
8965 // All the stores store different parts of the CombinedValue. A truncate is
8966 // required to get the partial value.
8967 SDValue Trunc = Store->getValue();
8968 if (Trunc.getOpcode() != ISD::TRUNCATE)
8969 return SDValue();
8970 // Other than the first/last part, a shift operation is required to get the
8971 // offset.
8972 int64_t Offset = 0;
8973 SDValue WideVal = Trunc.getOperand(0);
8974 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8975 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8976 // The shift amount must be a constant multiple of the narrow type.
8977 // It is translated to the offset address in the wide source value "y".
8978 //
8979 // x = srl y, ShiftAmtC
8980 // i8 z = trunc x
8981 // store z, ...
8982 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8983 if (ShiftAmtC % NarrowNumBits != 0)
8984 return SDValue();
8985
8986 // Make sure we aren't reading bits that are shifted in.
8987 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
8988 return SDValue();
8989
8990 Offset = ShiftAmtC / NarrowNumBits;
8991 WideVal = WideVal.getOperand(0);
8992 }
8993
8994 // Stores must share the same source value with different offsets.
8995 if (!SourceValue)
8996 SourceValue = WideVal;
8997 else if (SourceValue != WideVal) {
8998 // Truncate and extends can be stripped to see if the values are related.
8999 if (stripTruncAndExt(SourceValue) != WideVal &&
9000 stripTruncAndExt(WideVal) != SourceValue)
9001 return SDValue();
9002
9003 if (WideVal.getScalarValueSizeInBits() >
9004 SourceValue.getScalarValueSizeInBits())
9005 SourceValue = WideVal;
9006
9007 // Give up if the source value type is smaller than the store size.
9008 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
9009 return SDValue();
9010 }
9011
9012 // Stores must share the same base address.
9014 int64_t ByteOffsetFromBase = 0;
9015 if (!Base)
9016 Base = Ptr;
9017 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9018 return SDValue();
9019
9020 // Remember the first store.
9021 if (ByteOffsetFromBase < FirstOffset) {
9022 FirstStore = Store;
9023 FirstOffset = ByteOffsetFromBase;
9024 }
9025 // Map the offset in the store and the offset in the combined value, and
9026 // early return if it has been set before.
9027 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
9028 return SDValue();
9029 OffsetMap[Offset] = ByteOffsetFromBase;
9030 }
9031
9032 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9033 assert(FirstStore && "First store must be set");
9034
9035 // Check that a store of the wide type is both allowed and fast on the target
9036 const DataLayout &Layout = DAG.getDataLayout();
9037 unsigned Fast = 0;
9038 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9039 *FirstStore->getMemOperand(), &Fast);
9040 if (!Allowed || !Fast)
9041 return SDValue();
9042
9043 // Check if the pieces of the value are going to the expected places in memory
9044 // to merge the stores.
9045 auto checkOffsets = [&](bool MatchLittleEndian) {
9046 if (MatchLittleEndian) {
9047 for (unsigned i = 0; i != NumStores; ++i)
9048 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9049 return false;
9050 } else { // MatchBigEndian by reversing loop counter.
9051 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9052 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9053 return false;
9054 }
9055 return true;
9056 };
9057
9058 // Check if the offsets line up for the native data layout of this target.
9059 bool NeedBswap = false;
9060 bool NeedRotate = false;
9061 if (!checkOffsets(Layout.isLittleEndian())) {
9062 // Special-case: check if byte offsets line up for the opposite endian.
9063 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9064 NeedBswap = true;
9065 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9066 NeedRotate = true;
9067 else
9068 return SDValue();
9069 }
9070
9071 SDLoc DL(N);
9072 if (WideVT != SourceValue.getValueType()) {
9073 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9074 "Unexpected store value to merge");
9075 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9076 }
9077
9078 // Before legalize we can introduce illegal bswaps/rotates which will be later
9079 // converted to an explicit bswap sequence. This way we end up with a single
9080 // store and byte shuffling instead of several stores and byte shuffling.
9081 if (NeedBswap) {
9082 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9083 } else if (NeedRotate) {
9084 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9085 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9086 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9087 }
9088
9089 SDValue NewStore =
9090 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9091 FirstStore->getPointerInfo(), FirstStore->getAlign());
9092
9093 // Rely on other DAG combine rules to remove the other individual stores.
9094 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9095 return NewStore;
9096}
9097
9098/// Match a pattern where a wide type scalar value is loaded by several narrow
9099/// loads and combined by shifts and ors. Fold it into a single load or a load
9100/// and a BSWAP if the targets supports it.
9101///
9102/// Assuming little endian target:
9103/// i8 *a = ...
9104/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9105/// =>
9106/// i32 val = *((i32)a)
9107///
9108/// i8 *a = ...
9109/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9110/// =>
9111/// i32 val = BSWAP(*((i32)a))
9112///
9113/// TODO: This rule matches complex patterns with OR node roots and doesn't
9114/// interact well with the worklist mechanism. When a part of the pattern is
9115/// updated (e.g. one of the loads) its direct users are put into the worklist,
9116/// but the root node of the pattern which triggers the load combine is not
9117/// necessarily a direct user of the changed node. For example, once the address
9118/// of t28 load is reassociated load combine won't be triggered:
9119/// t25: i32 = add t4, Constant:i32<2>
9120/// t26: i64 = sign_extend t25
9121/// t27: i64 = add t2, t26
9122/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9123/// t29: i32 = zero_extend t28
9124/// t32: i32 = shl t29, Constant:i8<8>
9125/// t33: i32 = or t23, t32
9126/// As a possible fix visitLoad can check if the load can be a part of a load
9127/// combine pattern and add corresponding OR roots to the worklist.
9128SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9129 assert(N->getOpcode() == ISD::OR &&
9130 "Can only match load combining against OR nodes");
9131
9132 // Handles simple types only
9133 EVT VT = N->getValueType(0);
9134 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9135 return SDValue();
9136 unsigned ByteWidth = VT.getSizeInBits() / 8;
9137
9138 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9139 auto MemoryByteOffset = [&](SDByteProvider P) {
9140 assert(P.hasSrc() && "Must be a memory byte provider");
9141 auto *Load = cast<LoadSDNode>(P.Src.value());
9142
9143 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9144
9145 assert(LoadBitWidth % 8 == 0 &&
9146 "can only analyze providers for individual bytes not bit");
9147 unsigned LoadByteWidth = LoadBitWidth / 8;
9148 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9149 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9150 };
9151
9152 std::optional<BaseIndexOffset> Base;
9153 SDValue Chain;
9154
9156 std::optional<SDByteProvider> FirstByteProvider;
9157 int64_t FirstOffset = INT64_MAX;
9158
9159 // Check if all the bytes of the OR we are looking at are loaded from the same
9160 // base address. Collect bytes offsets from Base address in ByteOffsets.
9161 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9162 unsigned ZeroExtendedBytes = 0;
9163 for (int i = ByteWidth - 1; i >= 0; --i) {
9164 auto P =
9165 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9166 /*StartingIndex*/ i);
9167 if (!P)
9168 return SDValue();
9169
9170 if (P->isConstantZero()) {
9171 // It's OK for the N most significant bytes to be 0, we can just
9172 // zero-extend the load.
9173 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9174 return SDValue();
9175 continue;
9176 }
9177 assert(P->hasSrc() && "provenance should either be memory or zero");
9178 auto *L = cast<LoadSDNode>(P->Src.value());
9179
9180 // All loads must share the same chain
9181 SDValue LChain = L->getChain();
9182 if (!Chain)
9183 Chain = LChain;
9184 else if (Chain != LChain)
9185 return SDValue();
9186
9187 // Loads must share the same base address
9189 int64_t ByteOffsetFromBase = 0;
9190
9191 // For vector loads, the expected load combine pattern will have an
9192 // ExtractElement for each index in the vector. While each of these
9193 // ExtractElements will be accessing the same base address as determined
9194 // by the load instruction, the actual bytes they interact with will differ
9195 // due to different ExtractElement indices. To accurately determine the
9196 // byte position of an ExtractElement, we offset the base load ptr with
9197 // the index multiplied by the byte size of each element in the vector.
9198 if (L->getMemoryVT().isVector()) {
9199 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9200 if (LoadWidthInBit % 8 != 0)
9201 return SDValue();
9202 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9203 Ptr.addToOffset(ByteOffsetFromVector);
9204 }
9205
9206 if (!Base)
9207 Base = Ptr;
9208
9209 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9210 return SDValue();
9211
9212 // Calculate the offset of the current byte from the base address
9213 ByteOffsetFromBase += MemoryByteOffset(*P);
9214 ByteOffsets[i] = ByteOffsetFromBase;
9215
9216 // Remember the first byte load
9217 if (ByteOffsetFromBase < FirstOffset) {
9218 FirstByteProvider = P;
9219 FirstOffset = ByteOffsetFromBase;
9220 }
9221
9222 Loads.insert(L);
9223 }
9224
9225 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9226 "memory, so there must be at least one load which produces the value");
9227 assert(Base && "Base address of the accessed memory location must be set");
9228 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9229
9230 bool NeedsZext = ZeroExtendedBytes > 0;
9231
9232 EVT MemVT =
9233 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9234
9235 if (!MemVT.isSimple())
9236 return SDValue();
9237
9238 // Before legalize we can introduce too wide illegal loads which will be later
9239 // split into legal sized loads. This enables us to combine i64 load by i8
9240 // patterns to a couple of i32 loads on 32 bit targets.
9241 if (LegalOperations &&
9243 MemVT))
9244 return SDValue();
9245
9246 // Check if the bytes of the OR we are looking at match with either big or
9247 // little endian value load
9248 std::optional<bool> IsBigEndian = isBigEndian(
9249 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9250 if (!IsBigEndian)
9251 return SDValue();
9252
9253 assert(FirstByteProvider && "must be set");
9254
9255 // Ensure that the first byte is loaded from zero offset of the first load.
9256 // So the combined value can be loaded from the first load address.
9257 if (MemoryByteOffset(*FirstByteProvider) != 0)
9258 return SDValue();
9259 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9260
9261 // The node we are looking at matches with the pattern, check if we can
9262 // replace it with a single (possibly zero-extended) load and bswap + shift if
9263 // needed.
9264
9265 // If the load needs byte swap check if the target supports it
9266 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9267
9268 // Before legalize we can introduce illegal bswaps which will be later
9269 // converted to an explicit bswap sequence. This way we end up with a single
9270 // load and byte shuffling instead of several loads and byte shuffling.
9271 // We do not introduce illegal bswaps when zero-extending as this tends to
9272 // introduce too many arithmetic instructions.
9273 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9274 !TLI.isOperationLegal(ISD::BSWAP, VT))
9275 return SDValue();
9276
9277 // If we need to bswap and zero extend, we have to insert a shift. Check that
9278 // it is legal.
9279 if (NeedsBswap && NeedsZext && LegalOperations &&
9280 !TLI.isOperationLegal(ISD::SHL, VT))
9281 return SDValue();
9282
9283 // Check that a load of the wide type is both allowed and fast on the target
9284 unsigned Fast = 0;
9285 bool Allowed =
9286 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9287 *FirstLoad->getMemOperand(), &Fast);
9288 if (!Allowed || !Fast)
9289 return SDValue();
9290
9291 SDValue NewLoad =
9292 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9293 Chain, FirstLoad->getBasePtr(),
9294 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9295
9296 // Transfer chain users from old loads to the new load.
9297 for (LoadSDNode *L : Loads)
9298 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9299
9300 if (!NeedsBswap)
9301 return NewLoad;
9302
9303 SDValue ShiftedLoad =
9304 NeedsZext
9305 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9306 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9307 SDLoc(N), LegalOperations))
9308 : NewLoad;
9309 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9310}
9311
9312// If the target has andn, bsl, or a similar bit-select instruction,
9313// we want to unfold masked merge, with canonical pattern of:
9314// | A | |B|
9315// ((x ^ y) & m) ^ y
9316// | D |
9317// Into:
9318// (x & m) | (y & ~m)
9319// If y is a constant, m is not a 'not', and the 'andn' does not work with
9320// immediates, we unfold into a different pattern:
9321// ~(~x & m) & (m | y)
9322// If x is a constant, m is a 'not', and the 'andn' does not work with
9323// immediates, we unfold into a different pattern:
9324// (x | ~m) & ~(~m & ~y)
9325// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9326// the very least that breaks andnpd / andnps patterns, and because those
9327// patterns are simplified in IR and shouldn't be created in the DAG
9328SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9329 assert(N->getOpcode() == ISD::XOR);
9330
9331 // Don't touch 'not' (i.e. where y = -1).
9332 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9333 return SDValue();
9334
9335 EVT VT = N->getValueType(0);
9336
9337 // There are 3 commutable operators in the pattern,
9338 // so we have to deal with 8 possible variants of the basic pattern.
9339 SDValue X, Y, M;
9340 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9341 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9342 return false;
9343 SDValue Xor = And.getOperand(XorIdx);
9344 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9345 return false;
9346 SDValue Xor0 = Xor.getOperand(0);
9347 SDValue Xor1 = Xor.getOperand(1);
9348 // Don't touch 'not' (i.e. where y = -1).
9349 if (isAllOnesOrAllOnesSplat(Xor1))
9350 return false;
9351 if (Other == Xor0)
9352 std::swap(Xor0, Xor1);
9353 if (Other != Xor1)
9354 return false;
9355 X = Xor0;
9356 Y = Xor1;
9357 M = And.getOperand(XorIdx ? 0 : 1);
9358 return true;
9359 };
9360
9361 SDValue N0 = N->getOperand(0);
9362 SDValue N1 = N->getOperand(1);
9363 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9364 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9365 return SDValue();
9366
9367 // Don't do anything if the mask is constant. This should not be reachable.
9368 // InstCombine should have already unfolded this pattern, and DAGCombiner
9369 // probably shouldn't produce it, too.
9370 if (isa<ConstantSDNode>(M.getNode()))
9371 return SDValue();
9372
9373 // We can transform if the target has AndNot
9374 if (!TLI.hasAndNot(M))
9375 return SDValue();
9376
9377 SDLoc DL(N);
9378
9379 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9380 // a bitwise not that would already allow ANDN to be used.
9381 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9382 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9383 // If not, we need to do a bit more work to make sure andn is still used.
9384 SDValue NotX = DAG.getNOT(DL, X, VT);
9385 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9386 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9387 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9388 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9389 }
9390
9391 // If X is a constant and M is a bitwise not, check that 'andn' works with
9392 // immediates.
9393 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9394 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9395 // If not, we need to do a bit more work to make sure andn is still used.
9396 SDValue NotM = M.getOperand(0);
9397 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9398 SDValue NotY = DAG.getNOT(DL, Y, VT);
9399 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9400 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9401 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9402 }
9403
9404 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9405 SDValue NotM = DAG.getNOT(DL, M, VT);
9406 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9407
9408 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9409}
9410
9411SDValue DAGCombiner::visitXOR(SDNode *N) {
9412 SDValue N0 = N->getOperand(0);
9413 SDValue N1 = N->getOperand(1);
9414 EVT VT = N0.getValueType();
9415 SDLoc DL(N);
9416
9417 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9418 if (N0.isUndef() && N1.isUndef())
9419 return DAG.getConstant(0, DL, VT);
9420
9421 // fold (xor x, undef) -> undef
9422 if (N0.isUndef())
9423 return N0;
9424 if (N1.isUndef())
9425 return N1;
9426
9427 // fold (xor c1, c2) -> c1^c2
9428 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9429 return C;
9430
9431 // canonicalize constant to RHS
9434 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9435
9436 // fold vector ops
9437 if (VT.isVector()) {
9438 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9439 return FoldedVOp;
9440
9441 // fold (xor x, 0) -> x, vector edition
9443 return N0;
9444 }
9445
9446 // fold (xor x, 0) -> x
9447 if (isNullConstant(N1))
9448 return N0;
9449
9450 if (SDValue NewSel = foldBinOpIntoSelect(N))
9451 return NewSel;
9452
9453 // reassociate xor
9454 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9455 return RXOR;
9456
9457 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9458 if (SDValue SD =
9459 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9460 return SD;
9461
9462 // fold (a^b) -> (a|b) iff a and b share no bits.
9463 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9464 DAG.haveNoCommonBitsSet(N0, N1)) {
9466 Flags.setDisjoint(true);
9467 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9468 }
9469
9470 // look for 'add-like' folds:
9471 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9472 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9474 if (SDValue Combined = visitADDLike(N))
9475 return Combined;
9476
9477 // fold !(x cc y) -> (x !cc y)
9478 unsigned N0Opcode = N0.getOpcode();
9479 SDValue LHS, RHS, CC;
9480 if (TLI.isConstTrueVal(N1) &&
9481 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9482 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9483 LHS.getValueType());
9484 if (!LegalOperations ||
9485 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9486 switch (N0Opcode) {
9487 default:
9488 llvm_unreachable("Unhandled SetCC Equivalent!");
9489 case ISD::SETCC:
9490 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9491 case ISD::SELECT_CC:
9492 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9493 N0.getOperand(3), NotCC);
9494 case ISD::STRICT_FSETCC:
9495 case ISD::STRICT_FSETCCS: {
9496 if (N0.hasOneUse()) {
9497 // FIXME Can we handle multiple uses? Could we token factor the chain
9498 // results from the new/old setcc?
9499 SDValue SetCC =
9500 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9501 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9502 CombineTo(N, SetCC);
9503 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9504 recursivelyDeleteUnusedNodes(N0.getNode());
9505 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9506 }
9507 break;
9508 }
9509 }
9510 }
9511 }
9512
9513 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9514 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9515 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9516 SDValue V = N0.getOperand(0);
9517 SDLoc DL0(N0);
9518 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9519 DAG.getConstant(1, DL0, V.getValueType()));
9520 AddToWorklist(V.getNode());
9521 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9522 }
9523
9524 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9525 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9526 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9527 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9528 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9529 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9530 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9531 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9532 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9533 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9534 }
9535 }
9536 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9537 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9538 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9539 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9540 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9541 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9542 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9543 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9544 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9545 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9546 }
9547 }
9548
9549 // fold (not (neg x)) -> (add X, -1)
9550 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9551 // Y is a constant or the subtract has a single use.
9552 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9553 isNullConstant(N0.getOperand(0))) {
9554 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9555 DAG.getAllOnesConstant(DL, VT));
9556 }
9557
9558 // fold (not (add X, -1)) -> (neg X)
9559 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9561 return DAG.getNegative(N0.getOperand(0), DL, VT);
9562 }
9563
9564 // fold (xor (and x, y), y) -> (and (not x), y)
9565 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9566 SDValue X = N0.getOperand(0);
9567 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9568 AddToWorklist(NotX.getNode());
9569 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9570 }
9571
9572 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9573 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9574 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9575 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9576 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9577 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9578 SDValue S0 = S.getOperand(0);
9579 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9581 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9582 return DAG.getNode(ISD::ABS, DL, VT, S0);
9583 }
9584 }
9585
9586 // fold (xor x, x) -> 0
9587 if (N0 == N1)
9588 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9589
9590 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9591 // Here is a concrete example of this equivalence:
9592 // i16 x == 14
9593 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9594 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9595 //
9596 // =>
9597 //
9598 // i16 ~1 == 0b1111111111111110
9599 // i16 rol(~1, 14) == 0b1011111111111111
9600 //
9601 // Some additional tips to help conceptualize this transform:
9602 // - Try to see the operation as placing a single zero in a value of all ones.
9603 // - There exists no value for x which would allow the result to contain zero.
9604 // - Values of x larger than the bitwidth are undefined and do not require a
9605 // consistent result.
9606 // - Pushing the zero left requires shifting one bits in from the right.
9607 // A rotate left of ~1 is a nice way of achieving the desired result.
9608 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9610 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9611 N0.getOperand(1));
9612 }
9613
9614 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9615 if (N0Opcode == N1.getOpcode())
9616 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9617 return V;
9618
9619 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9620 return R;
9621 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9622 return R;
9623 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9624 return R;
9625
9626 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9627 if (SDValue MM = unfoldMaskedMerge(N))
9628 return MM;
9629
9630 // Simplify the expression using non-local knowledge.
9632 return SDValue(N, 0);
9633
9634 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9635 return Combined;
9636
9637 return SDValue();
9638}
9639
9640/// If we have a shift-by-constant of a bitwise logic op that itself has a
9641/// shift-by-constant operand with identical opcode, we may be able to convert
9642/// that into 2 independent shifts followed by the logic op. This is a
9643/// throughput improvement.
9645 // Match a one-use bitwise logic op.
9646 SDValue LogicOp = Shift->getOperand(0);
9647 if (!LogicOp.hasOneUse())
9648 return SDValue();
9649
9650 unsigned LogicOpcode = LogicOp.getOpcode();
9651 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9652 LogicOpcode != ISD::XOR)
9653 return SDValue();
9654
9655 // Find a matching one-use shift by constant.
9656 unsigned ShiftOpcode = Shift->getOpcode();
9657 SDValue C1 = Shift->getOperand(1);
9658 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9659 assert(C1Node && "Expected a shift with constant operand");
9660 const APInt &C1Val = C1Node->getAPIntValue();
9661 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9662 const APInt *&ShiftAmtVal) {
9663 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9664 return false;
9665
9666 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9667 if (!ShiftCNode)
9668 return false;
9669
9670 // Capture the shifted operand and shift amount value.
9671 ShiftOp = V.getOperand(0);
9672 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9673
9674 // Shift amount types do not have to match their operand type, so check that
9675 // the constants are the same width.
9676 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9677 return false;
9678
9679 // The fold is not valid if the sum of the shift values doesn't fit in the
9680 // given shift amount type.
9681 bool Overflow = false;
9682 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9683 if (Overflow)
9684 return false;
9685
9686 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9687 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9688 return false;
9689
9690 return true;
9691 };
9692
9693 // Logic ops are commutative, so check each operand for a match.
9694 SDValue X, Y;
9695 const APInt *C0Val;
9696 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9697 Y = LogicOp.getOperand(1);
9698 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9699 Y = LogicOp.getOperand(0);
9700 else
9701 return SDValue();
9702
9703 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9704 SDLoc DL(Shift);
9705 EVT VT = Shift->getValueType(0);
9706 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9707 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9708 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9709 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9710 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9711 LogicOp->getFlags());
9712}
9713
9714/// Handle transforms common to the three shifts, when the shift amount is a
9715/// constant.
9716/// We are looking for: (shift being one of shl/sra/srl)
9717/// shift (binop X, C0), C1
9718/// And want to transform into:
9719/// binop (shift X, C1), (shift C0, C1)
9720SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9721 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9722
9723 // Do not turn a 'not' into a regular xor.
9724 if (isBitwiseNot(N->getOperand(0)))
9725 return SDValue();
9726
9727 // The inner binop must be one-use, since we want to replace it.
9728 SDValue LHS = N->getOperand(0);
9729 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9730 return SDValue();
9731
9732 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9733 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9734 return R;
9735
9736 // We want to pull some binops through shifts, so that we have (and (shift))
9737 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9738 // thing happens with address calculations, so it's important to canonicalize
9739 // it.
9740 switch (LHS.getOpcode()) {
9741 default:
9742 return SDValue();
9743 case ISD::OR:
9744 case ISD::XOR:
9745 case ISD::AND:
9746 break;
9747 case ISD::ADD:
9748 if (N->getOpcode() != ISD::SHL)
9749 return SDValue(); // only shl(add) not sr[al](add).
9750 break;
9751 }
9752
9753 // FIXME: disable this unless the input to the binop is a shift by a constant
9754 // or is copy/select. Enable this in other cases when figure out it's exactly
9755 // profitable.
9756 SDValue BinOpLHSVal = LHS.getOperand(0);
9757 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9758 BinOpLHSVal.getOpcode() == ISD::SRA ||
9759 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9760 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9761 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9762 BinOpLHSVal.getOpcode() == ISD::SELECT;
9763
9764 if (!IsShiftByConstant && !IsCopyOrSelect)
9765 return SDValue();
9766
9767 if (IsCopyOrSelect && N->hasOneUse())
9768 return SDValue();
9769
9770 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9771 SDLoc DL(N);
9772 EVT VT = N->getValueType(0);
9773 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9774 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9775 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9776 N->getOperand(1));
9777 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9778 }
9779
9780 return SDValue();
9781}
9782
9783SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9784 assert(N->getOpcode() == ISD::TRUNCATE);
9785 assert(N->getOperand(0).getOpcode() == ISD::AND);
9786
9787 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9788 EVT TruncVT = N->getValueType(0);
9789 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9790 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9791 SDValue N01 = N->getOperand(0).getOperand(1);
9792 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9793 SDLoc DL(N);
9794 SDValue N00 = N->getOperand(0).getOperand(0);
9795 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9796 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9797 AddToWorklist(Trunc00.getNode());
9798 AddToWorklist(Trunc01.getNode());
9799 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9800 }
9801 }
9802
9803 return SDValue();
9804}
9805
9806SDValue DAGCombiner::visitRotate(SDNode *N) {
9807 SDLoc dl(N);
9808 SDValue N0 = N->getOperand(0);
9809 SDValue N1 = N->getOperand(1);
9810 EVT VT = N->getValueType(0);
9811 unsigned Bitsize = VT.getScalarSizeInBits();
9812
9813 // fold (rot x, 0) -> x
9814 if (isNullOrNullSplat(N1))
9815 return N0;
9816
9817 // fold (rot x, c) -> x iff (c % BitSize) == 0
9818 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9819 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9820 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9821 return N0;
9822 }
9823
9824 // fold (rot x, c) -> (rot x, c % BitSize)
9825 bool OutOfRange = false;
9826 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9827 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9828 return true;
9829 };
9830 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9831 EVT AmtVT = N1.getValueType();
9832 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9833 if (SDValue Amt =
9834 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9835 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9836 }
9837
9838 // rot i16 X, 8 --> bswap X
9839 auto *RotAmtC = isConstOrConstSplat(N1);
9840 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9841 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9842 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9843
9844 // Simplify the operands using demanded-bits information.
9846 return SDValue(N, 0);
9847
9848 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9849 if (N1.getOpcode() == ISD::TRUNCATE &&
9850 N1.getOperand(0).getOpcode() == ISD::AND) {
9851 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9852 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9853 }
9854
9855 unsigned NextOp = N0.getOpcode();
9856
9857 // fold (rot* (rot* x, c2), c1)
9858 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9859 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9862 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9863 EVT ShiftVT = C1->getValueType(0);
9864 bool SameSide = (N->getOpcode() == NextOp);
9865 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9866 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9867 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9868 {N1, BitsizeC});
9869 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9870 {N0.getOperand(1), BitsizeC});
9871 if (Norm1 && Norm2)
9872 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9873 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9874 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9875 {CombinedShift, BitsizeC});
9876 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9877 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9878 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9879 CombinedShiftNorm);
9880 }
9881 }
9882 }
9883 return SDValue();
9884}
9885
9886SDValue DAGCombiner::visitSHL(SDNode *N) {
9887 SDValue N0 = N->getOperand(0);
9888 SDValue N1 = N->getOperand(1);
9889 if (SDValue V = DAG.simplifyShift(N0, N1))
9890 return V;
9891
9892 SDLoc DL(N);
9893 EVT VT = N0.getValueType();
9894 EVT ShiftVT = N1.getValueType();
9895 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9896
9897 // fold (shl c1, c2) -> c1<<c2
9898 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
9899 return C;
9900
9901 // fold vector ops
9902 if (VT.isVector()) {
9903 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9904 return FoldedVOp;
9905
9906 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9907 // If setcc produces all-one true value then:
9908 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9909 if (N1CV && N1CV->isConstant()) {
9910 if (N0.getOpcode() == ISD::AND) {
9911 SDValue N00 = N0->getOperand(0);
9912 SDValue N01 = N0->getOperand(1);
9913 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9914
9915 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9918 if (SDValue C =
9919 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
9920 return DAG.getNode(ISD::AND, DL, VT, N00, C);
9921 }
9922 }
9923 }
9924 }
9925
9926 if (SDValue NewSel = foldBinOpIntoSelect(N))
9927 return NewSel;
9928
9929 // if (shl x, c) is known to be zero, return 0
9930 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9931 return DAG.getConstant(0, DL, VT);
9932
9933 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9934 if (N1.getOpcode() == ISD::TRUNCATE &&
9935 N1.getOperand(0).getOpcode() == ISD::AND) {
9936 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9937 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
9938 }
9939
9940 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9941 if (N0.getOpcode() == ISD::SHL) {
9942 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9944 APInt c1 = LHS->getAPIntValue();
9945 APInt c2 = RHS->getAPIntValue();
9946 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9947 return (c1 + c2).uge(OpSizeInBits);
9948 };
9949 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9950 return DAG.getConstant(0, DL, VT);
9951
9952 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9954 APInt c1 = LHS->getAPIntValue();
9955 APInt c2 = RHS->getAPIntValue();
9956 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9957 return (c1 + c2).ult(OpSizeInBits);
9958 };
9959 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9960 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9961 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9962 }
9963 }
9964
9965 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9966 // For this to be valid, the second form must not preserve any of the bits
9967 // that are shifted out by the inner shift in the first form. This means
9968 // the outer shift size must be >= the number of bits added by the ext.
9969 // As a corollary, we don't care what kind of ext it is.
9970 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9971 N0.getOpcode() == ISD::ANY_EXTEND ||
9972 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9973 N0.getOperand(0).getOpcode() == ISD::SHL) {
9974 SDValue N0Op0 = N0.getOperand(0);
9975 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9976 EVT InnerVT = N0Op0.getValueType();
9977 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9978
9979 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9981 APInt c1 = LHS->getAPIntValue();
9982 APInt c2 = RHS->getAPIntValue();
9983 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9984 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9985 (c1 + c2).uge(OpSizeInBits);
9986 };
9987 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9988 /*AllowUndefs*/ false,
9989 /*AllowTypeMismatch*/ true))
9990 return DAG.getConstant(0, DL, VT);
9991
9992 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9994 APInt c1 = LHS->getAPIntValue();
9995 APInt c2 = RHS->getAPIntValue();
9996 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9997 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9998 (c1 + c2).ult(OpSizeInBits);
9999 };
10000 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
10001 /*AllowUndefs*/ false,
10002 /*AllowTypeMismatch*/ true)) {
10003 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
10004 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
10005 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
10006 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
10007 }
10008 }
10009
10010 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
10011 // Only fold this if the inner zext has no other uses to avoid increasing
10012 // the total number of instructions.
10013 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
10014 N0.getOperand(0).getOpcode() == ISD::SRL) {
10015 SDValue N0Op0 = N0.getOperand(0);
10016 SDValue InnerShiftAmt = N0Op0.getOperand(1);
10017
10018 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10019 APInt c1 = LHS->getAPIntValue();
10020 APInt c2 = RHS->getAPIntValue();
10021 zeroExtendToMatch(c1, c2);
10022 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
10023 };
10024 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
10025 /*AllowUndefs*/ false,
10026 /*AllowTypeMismatch*/ true)) {
10027 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
10028 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
10029 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
10030 AddToWorklist(NewSHL.getNode());
10031 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
10032 }
10033 }
10034
10035 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
10036 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10038 const APInt &LHSC = LHS->getAPIntValue();
10039 const APInt &RHSC = RHS->getAPIntValue();
10040 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10041 LHSC.getZExtValue() <= RHSC.getZExtValue();
10042 };
10043
10044 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10045 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10046 if (N0->getFlags().hasExact()) {
10047 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10048 /*AllowUndefs*/ false,
10049 /*AllowTypeMismatch*/ true)) {
10050 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10051 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10052 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10053 }
10054 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10055 /*AllowUndefs*/ false,
10056 /*AllowTypeMismatch*/ true)) {
10057 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10058 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10059 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10060 }
10061 }
10062
10063 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10064 // (and (srl x, (sub c1, c2), MASK)
10065 // Only fold this if the inner shift has no other uses -- if it does,
10066 // folding this will increase the total number of instructions.
10067 if (N0.getOpcode() == ISD::SRL &&
10068 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10070 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10071 /*AllowUndefs*/ false,
10072 /*AllowTypeMismatch*/ true)) {
10073 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10074 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10075 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10076 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10077 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10078 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10079 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10080 }
10081 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10082 /*AllowUndefs*/ false,
10083 /*AllowTypeMismatch*/ true)) {
10084 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10085 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10086 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10087 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10088 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10089 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10090 }
10091 }
10092 }
10093
10094 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10095 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10096 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10097 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10098 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10099 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10100 }
10101
10102 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10103 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10104 // Variant of version done on multiply, except mul by a power of 2 is turned
10105 // into a shift.
10106 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10107 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10108 SDValue N01 = N0.getOperand(1);
10109 if (SDValue Shl1 =
10110 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10111 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10112 AddToWorklist(Shl0.getNode());
10114 // Preserve the disjoint flag for Or.
10115 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10116 Flags.setDisjoint(true);
10117 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10118 }
10119 }
10120
10121 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10122 // TODO: Add zext/add_nuw variant with suitable test coverage
10123 // TODO: Should we limit this with isLegalAddImmediate?
10124 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10125 N0.getOperand(0).getOpcode() == ISD::ADD &&
10126 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10127 N0.getOperand(0)->hasOneUse() &&
10128 TLI.isDesirableToCommuteWithShift(N, Level)) {
10129 SDValue Add = N0.getOperand(0);
10130 SDLoc DL(N0);
10131 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10132 {Add.getOperand(1)})) {
10133 if (SDValue ShlC =
10134 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10135 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10136 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10137 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10138 }
10139 }
10140 }
10141
10142 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10143 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10144 SDValue N01 = N0.getOperand(1);
10145 if (SDValue Shl =
10146 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10147 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10148 }
10149
10151 if (N1C && !N1C->isOpaque())
10152 if (SDValue NewSHL = visitShiftByConstant(N))
10153 return NewSHL;
10154
10155 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10156 // target.
10157 if (((N1.getOpcode() == ISD::CTTZ &&
10158 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10159 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10160 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10162 SDValue Y = N1.getOperand(0);
10163 SDLoc DL(N);
10164 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10165 SDValue And =
10166 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10167 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10168 }
10169
10171 return SDValue(N, 0);
10172
10173 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10174 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10175 const APInt &C0 = N0.getConstantOperandAPInt(0);
10176 const APInt &C1 = N1C->getAPIntValue();
10177 return DAG.getVScale(DL, VT, C0 << C1);
10178 }
10179
10180 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10181 APInt ShlVal;
10182 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10183 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10184 const APInt &C0 = N0.getConstantOperandAPInt(0);
10185 if (ShlVal.ult(C0.getBitWidth())) {
10186 APInt NewStep = C0 << ShlVal;
10187 return DAG.getStepVector(DL, VT, NewStep);
10188 }
10189 }
10190
10191 return SDValue();
10192}
10193
10194// Transform a right shift of a multiply into a multiply-high.
10195// Examples:
10196// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10197// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10199 const TargetLowering &TLI) {
10200 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10201 "SRL or SRA node is required here!");
10202
10203 // Check the shift amount. Proceed with the transformation if the shift
10204 // amount is constant.
10205 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10206 if (!ShiftAmtSrc)
10207 return SDValue();
10208
10209 // The operation feeding into the shift must be a multiply.
10210 SDValue ShiftOperand = N->getOperand(0);
10211 if (ShiftOperand.getOpcode() != ISD::MUL)
10212 return SDValue();
10213
10214 // Both operands must be equivalent extend nodes.
10215 SDValue LeftOp = ShiftOperand.getOperand(0);
10216 SDValue RightOp = ShiftOperand.getOperand(1);
10217
10218 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10219 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10220
10221 if (!IsSignExt && !IsZeroExt)
10222 return SDValue();
10223
10224 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10225 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10226
10227 // return true if U may use the lower bits of its operands
10228 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10229 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10230 return true;
10231 }
10232 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10233 if (!UShiftAmtSrc) {
10234 return true;
10235 }
10236 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10237 return UShiftAmt < NarrowVTSize;
10238 };
10239
10240 // If the lower part of the MUL is also used and MUL_LOHI is supported
10241 // do not introduce the MULH in favor of MUL_LOHI
10242 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10243 if (!ShiftOperand.hasOneUse() &&
10244 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10245 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10246 return SDValue();
10247 }
10248
10249 SDValue MulhRightOp;
10251 unsigned ActiveBits = IsSignExt
10252 ? Constant->getAPIntValue().getSignificantBits()
10253 : Constant->getAPIntValue().getActiveBits();
10254 if (ActiveBits > NarrowVTSize)
10255 return SDValue();
10256 MulhRightOp = DAG.getConstant(
10257 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10258 NarrowVT);
10259 } else {
10260 if (LeftOp.getOpcode() != RightOp.getOpcode())
10261 return SDValue();
10262 // Check that the two extend nodes are the same type.
10263 if (NarrowVT != RightOp.getOperand(0).getValueType())
10264 return SDValue();
10265 MulhRightOp = RightOp.getOperand(0);
10266 }
10267
10268 EVT WideVT = LeftOp.getValueType();
10269 // Proceed with the transformation if the wide types match.
10270 assert((WideVT == RightOp.getValueType()) &&
10271 "Cannot have a multiply node with two different operand types.");
10272
10273 // Proceed with the transformation if the wide type is twice as large
10274 // as the narrow type.
10275 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10276 return SDValue();
10277
10278 // Check the shift amount with the narrow type size.
10279 // Proceed with the transformation if the shift amount is the width
10280 // of the narrow type.
10281 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10282 if (ShiftAmt != NarrowVTSize)
10283 return SDValue();
10284
10285 // If the operation feeding into the MUL is a sign extend (sext),
10286 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10287 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10288
10289 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10290 // or if it is a vector type then we could transform to an acceptable type and
10291 // rely on legalization to split/combine the result.
10292 if (NarrowVT.isVector()) {
10293 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10294 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10295 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10296 return SDValue();
10297 } else {
10298 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10299 return SDValue();
10300 }
10301
10302 SDValue Result =
10303 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10304 bool IsSigned = N->getOpcode() == ISD::SRA;
10305 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10306}
10307
10308// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10309// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10311 unsigned Opcode = N->getOpcode();
10312 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10313 return SDValue();
10314
10315 SDValue N0 = N->getOperand(0);
10316 EVT VT = N->getValueType(0);
10317 SDLoc DL(N);
10318 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10319 SDValue OldLHS = N0.getOperand(0);
10320 SDValue OldRHS = N0.getOperand(1);
10321
10322 // If both operands are bswap/bitreverse, ignore the multiuse
10323 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10324 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10325 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10326 OldRHS.getOperand(0));
10327 }
10328
10329 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10330 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10331 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10332 NewBitReorder);
10333 }
10334
10335 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10336 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10337 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10338 OldRHS.getOperand(0));
10339 }
10340 }
10341 return SDValue();
10342}
10343
10344SDValue DAGCombiner::visitSRA(SDNode *N) {
10345 SDValue N0 = N->getOperand(0);
10346 SDValue N1 = N->getOperand(1);
10347 if (SDValue V = DAG.simplifyShift(N0, N1))
10348 return V;
10349
10350 SDLoc DL(N);
10351 EVT VT = N0.getValueType();
10352 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10353
10354 // fold (sra c1, c2) -> (sra c1, c2)
10355 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10356 return C;
10357
10358 // Arithmetic shifting an all-sign-bit value is a no-op.
10359 // fold (sra 0, x) -> 0
10360 // fold (sra -1, x) -> -1
10361 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10362 return N0;
10363
10364 // fold vector ops
10365 if (VT.isVector())
10366 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10367 return FoldedVOp;
10368
10369 if (SDValue NewSel = foldBinOpIntoSelect(N))
10370 return NewSel;
10371
10373
10374 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10375 // clamp (add c1, c2) to max shift.
10376 if (N0.getOpcode() == ISD::SRA) {
10377 EVT ShiftVT = N1.getValueType();
10378 EVT ShiftSVT = ShiftVT.getScalarType();
10379 SmallVector<SDValue, 16> ShiftValues;
10380
10381 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10382 APInt c1 = LHS->getAPIntValue();
10383 APInt c2 = RHS->getAPIntValue();
10384 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10385 APInt Sum = c1 + c2;
10386 unsigned ShiftSum =
10387 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10388 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10389 return true;
10390 };
10391 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10392 SDValue ShiftValue;
10393 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10394 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10395 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10396 assert(ShiftValues.size() == 1 &&
10397 "Expected matchBinaryPredicate to return one element for "
10398 "SPLAT_VECTORs");
10399 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10400 } else
10401 ShiftValue = ShiftValues[0];
10402 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10403 }
10404 }
10405
10406 // fold (sra (shl X, m), (sub result_size, n))
10407 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10408 // result_size - n != m.
10409 // If truncate is free for the target sext(shl) is likely to result in better
10410 // code.
10411 if (N0.getOpcode() == ISD::SHL && N1C) {
10412 // Get the two constants of the shifts, CN0 = m, CN = n.
10413 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10414 if (N01C) {
10415 LLVMContext &Ctx = *DAG.getContext();
10416 // Determine what the truncate's result bitsize and type would be.
10417 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10418
10419 if (VT.isVector())
10420 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10421
10422 // Determine the residual right-shift amount.
10423 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10424
10425 // If the shift is not a no-op (in which case this should be just a sign
10426 // extend already), the truncated to type is legal, sign_extend is legal
10427 // on that type, and the truncate to that type is both legal and free,
10428 // perform the transform.
10429 if ((ShiftAmt > 0) &&
10432 TLI.isTruncateFree(VT, TruncVT)) {
10433 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10435 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10436 N0.getOperand(0), Amt);
10437 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10438 Shift);
10439 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10440 N->getValueType(0), Trunc);
10441 }
10442 }
10443 }
10444
10445 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10446 // sra (add (shl X, N1C), AddC), N1C -->
10447 // sext (add (trunc X to (width - N1C)), AddC')
10448 // sra (sub AddC, (shl X, N1C)), N1C -->
10449 // sext (sub AddC1',(trunc X to (width - N1C)))
10450 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10451 N0.hasOneUse()) {
10452 bool IsAdd = N0.getOpcode() == ISD::ADD;
10453 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10454 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10455 Shl.hasOneUse()) {
10456 // TODO: AddC does not need to be a splat.
10457 if (ConstantSDNode *AddC =
10458 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10459 // Determine what the truncate's type would be and ask the target if
10460 // that is a free operation.
10461 LLVMContext &Ctx = *DAG.getContext();
10462 unsigned ShiftAmt = N1C->getZExtValue();
10463 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10464 if (VT.isVector())
10465 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10466
10467 // TODO: The simple type check probably belongs in the default hook
10468 // implementation and/or target-specific overrides (because
10469 // non-simple types likely require masking when legalized), but
10470 // that restriction may conflict with other transforms.
10471 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10472 TLI.isTruncateFree(VT, TruncVT)) {
10473 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10474 SDValue ShiftC =
10475 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10476 TruncVT.getScalarSizeInBits()),
10477 DL, TruncVT);
10478 SDValue Add;
10479 if (IsAdd)
10480 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10481 else
10482 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10483 return DAG.getSExtOrTrunc(Add, DL, VT);
10484 }
10485 }
10486 }
10487 }
10488
10489 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10490 if (N1.getOpcode() == ISD::TRUNCATE &&
10491 N1.getOperand(0).getOpcode() == ISD::AND) {
10492 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10493 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10494 }
10495
10496 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10497 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10498 // if c1 is equal to the number of bits the trunc removes
10499 // TODO - support non-uniform vector shift amounts.
10500 if (N0.getOpcode() == ISD::TRUNCATE &&
10501 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10502 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10503 N0.getOperand(0).hasOneUse() &&
10504 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10505 SDValue N0Op0 = N0.getOperand(0);
10506 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10507 EVT LargeVT = N0Op0.getValueType();
10508 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10509 if (LargeShift->getAPIntValue() == TruncBits) {
10510 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10511 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10512 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10513 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10514 SDValue SRA =
10515 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10516 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10517 }
10518 }
10519 }
10520
10521 // Simplify, based on bits shifted out of the LHS.
10523 return SDValue(N, 0);
10524
10525 // If the sign bit is known to be zero, switch this to a SRL.
10526 if (DAG.SignBitIsZero(N0))
10527 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10528
10529 if (N1C && !N1C->isOpaque())
10530 if (SDValue NewSRA = visitShiftByConstant(N))
10531 return NewSRA;
10532
10533 // Try to transform this shift into a multiply-high if
10534 // it matches the appropriate pattern detected in combineShiftToMULH.
10535 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10536 return MULH;
10537
10538 // Attempt to convert a sra of a load into a narrower sign-extending load.
10539 if (SDValue NarrowLoad = reduceLoadWidth(N))
10540 return NarrowLoad;
10541
10542 return SDValue();
10543}
10544
10545SDValue DAGCombiner::visitSRL(SDNode *N) {
10546 SDValue N0 = N->getOperand(0);
10547 SDValue N1 = N->getOperand(1);
10548 if (SDValue V = DAG.simplifyShift(N0, N1))
10549 return V;
10550
10551 SDLoc DL(N);
10552 EVT VT = N0.getValueType();
10553 EVT ShiftVT = N1.getValueType();
10554 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10555
10556 // fold (srl c1, c2) -> c1 >>u c2
10557 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10558 return C;
10559
10560 // fold vector ops
10561 if (VT.isVector())
10562 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10563 return FoldedVOp;
10564
10565 if (SDValue NewSel = foldBinOpIntoSelect(N))
10566 return NewSel;
10567
10568 // if (srl x, c) is known to be zero, return 0
10570 if (N1C &&
10571 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10572 return DAG.getConstant(0, DL, VT);
10573
10574 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10575 if (N0.getOpcode() == ISD::SRL) {
10576 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10578 APInt c1 = LHS->getAPIntValue();
10579 APInt c2 = RHS->getAPIntValue();
10580 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10581 return (c1 + c2).uge(OpSizeInBits);
10582 };
10583 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10584 return DAG.getConstant(0, DL, VT);
10585
10586 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10588 APInt c1 = LHS->getAPIntValue();
10589 APInt c2 = RHS->getAPIntValue();
10590 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10591 return (c1 + c2).ult(OpSizeInBits);
10592 };
10593 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10594 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10595 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10596 }
10597 }
10598
10599 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10600 N0.getOperand(0).getOpcode() == ISD::SRL) {
10601 SDValue InnerShift = N0.getOperand(0);
10602 // TODO - support non-uniform vector shift amounts.
10603 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10604 uint64_t c1 = N001C->getZExtValue();
10605 uint64_t c2 = N1C->getZExtValue();
10606 EVT InnerShiftVT = InnerShift.getValueType();
10607 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10608 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10609 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10610 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10611 if (c1 + OpSizeInBits == InnerShiftSize) {
10612 if (c1 + c2 >= InnerShiftSize)
10613 return DAG.getConstant(0, DL, VT);
10614 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10615 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10616 InnerShift.getOperand(0), NewShiftAmt);
10617 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10618 }
10619 // In the more general case, we can clear the high bits after the shift:
10620 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10621 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10622 c1 + c2 < InnerShiftSize) {
10623 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10624 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10625 InnerShift.getOperand(0), NewShiftAmt);
10626 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10627 OpSizeInBits - c2),
10628 DL, InnerShiftVT);
10629 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10630 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10631 }
10632 }
10633 }
10634
10635 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10636 // (and (srl x, (sub c2, c1), MASK)
10637 if (N0.getOpcode() == ISD::SHL &&
10638 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10640 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10642 const APInt &LHSC = LHS->getAPIntValue();
10643 const APInt &RHSC = RHS->getAPIntValue();
10644 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10645 LHSC.getZExtValue() <= RHSC.getZExtValue();
10646 };
10647 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10648 /*AllowUndefs*/ false,
10649 /*AllowTypeMismatch*/ true)) {
10650 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10651 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10652 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10653 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10654 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10655 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10656 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10657 }
10658 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10659 /*AllowUndefs*/ false,
10660 /*AllowTypeMismatch*/ true)) {
10661 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10662 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10663 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10664 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10665 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10666 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10667 }
10668 }
10669
10670 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10671 // TODO - support non-uniform vector shift amounts.
10672 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10673 // Shifting in all undef bits?
10674 EVT SmallVT = N0.getOperand(0).getValueType();
10675 unsigned BitSize = SmallVT.getScalarSizeInBits();
10676 if (N1C->getAPIntValue().uge(BitSize))
10677 return DAG.getUNDEF(VT);
10678
10679 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10680 uint64_t ShiftAmt = N1C->getZExtValue();
10681 SDLoc DL0(N0);
10682 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10683 N0.getOperand(0),
10684 DAG.getConstant(ShiftAmt, DL0,
10685 getShiftAmountTy(SmallVT)));
10686 AddToWorklist(SmallShift.getNode());
10687 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10688 return DAG.getNode(ISD::AND, DL, VT,
10689 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10690 DAG.getConstant(Mask, DL, VT));
10691 }
10692 }
10693
10694 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10695 // bit, which is unmodified by sra.
10696 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10697 if (N0.getOpcode() == ISD::SRA)
10698 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10699 }
10700
10701 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10702 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10703 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10704 isPowerOf2_32(OpSizeInBits) &&
10705 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10706 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10707
10708 // If any of the input bits are KnownOne, then the input couldn't be all
10709 // zeros, thus the result of the srl will always be zero.
10710 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10711
10712 // If all of the bits input the to ctlz node are known to be zero, then
10713 // the result of the ctlz is "32" and the result of the shift is one.
10714 APInt UnknownBits = ~Known.Zero;
10715 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10716
10717 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10718 if (UnknownBits.isPowerOf2()) {
10719 // Okay, we know that only that the single bit specified by UnknownBits
10720 // could be set on input to the CTLZ node. If this bit is set, the SRL
10721 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10722 // to an SRL/XOR pair, which is likely to simplify more.
10723 unsigned ShAmt = UnknownBits.countr_zero();
10724 SDValue Op = N0.getOperand(0);
10725
10726 if (ShAmt) {
10727 SDLoc DL(N0);
10728 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10729 DAG.getConstant(ShAmt, DL,
10730 getShiftAmountTy(Op.getValueType())));
10731 AddToWorklist(Op.getNode());
10732 }
10733 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10734 }
10735 }
10736
10737 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10738 if (N1.getOpcode() == ISD::TRUNCATE &&
10739 N1.getOperand(0).getOpcode() == ISD::AND) {
10740 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10741 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10742 }
10743
10744 // fold operands of srl based on knowledge that the low bits are not
10745 // demanded.
10747 return SDValue(N, 0);
10748
10749 if (N1C && !N1C->isOpaque())
10750 if (SDValue NewSRL = visitShiftByConstant(N))
10751 return NewSRL;
10752
10753 // Attempt to convert a srl of a load into a narrower zero-extending load.
10754 if (SDValue NarrowLoad = reduceLoadWidth(N))
10755 return NarrowLoad;
10756
10757 // Here is a common situation. We want to optimize:
10758 //
10759 // %a = ...
10760 // %b = and i32 %a, 2
10761 // %c = srl i32 %b, 1
10762 // brcond i32 %c ...
10763 //
10764 // into
10765 //
10766 // %a = ...
10767 // %b = and %a, 2
10768 // %c = setcc eq %b, 0
10769 // brcond %c ...
10770 //
10771 // However when after the source operand of SRL is optimized into AND, the SRL
10772 // itself may not be optimized further. Look for it and add the BRCOND into
10773 // the worklist.
10774 //
10775 // The also tends to happen for binary operations when SimplifyDemandedBits
10776 // is involved.
10777 //
10778 // FIXME: This is unecessary if we process the DAG in topological order,
10779 // which we plan to do. This workaround can be removed once the DAG is
10780 // processed in topological order.
10781 if (N->hasOneUse()) {
10782 SDNode *Use = *N->use_begin();
10783
10784 // Look pass the truncate.
10785 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10786 Use = *Use->use_begin();
10787
10788 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10789 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10790 AddToWorklist(Use);
10791 }
10792
10793 // Try to transform this shift into a multiply-high if
10794 // it matches the appropriate pattern detected in combineShiftToMULH.
10795 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10796 return MULH;
10797
10798 return SDValue();
10799}
10800
10801SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10802 EVT VT = N->getValueType(0);
10803 SDValue N0 = N->getOperand(0);
10804 SDValue N1 = N->getOperand(1);
10805 SDValue N2 = N->getOperand(2);
10806 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10807 unsigned BitWidth = VT.getScalarSizeInBits();
10808 SDLoc DL(N);
10809
10810 // fold (fshl N0, N1, 0) -> N0
10811 // fold (fshr N0, N1, 0) -> N1
10813 if (DAG.MaskedValueIsZero(
10814 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10815 return IsFSHL ? N0 : N1;
10816
10817 auto IsUndefOrZero = [](SDValue V) {
10818 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10819 };
10820
10821 // TODO - support non-uniform vector shift amounts.
10822 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10823 EVT ShAmtTy = N2.getValueType();
10824
10825 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10826 if (Cst->getAPIntValue().uge(BitWidth)) {
10827 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10828 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10829 DAG.getConstant(RotAmt, DL, ShAmtTy));
10830 }
10831
10832 unsigned ShAmt = Cst->getZExtValue();
10833 if (ShAmt == 0)
10834 return IsFSHL ? N0 : N1;
10835
10836 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10837 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10838 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10839 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10840 if (IsUndefOrZero(N0))
10841 return DAG.getNode(
10842 ISD::SRL, DL, VT, N1,
10843 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10844 if (IsUndefOrZero(N1))
10845 return DAG.getNode(
10846 ISD::SHL, DL, VT, N0,
10847 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10848
10849 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10850 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10851 // TODO - bigendian support once we have test coverage.
10852 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10853 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10854 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10855 !DAG.getDataLayout().isBigEndian()) {
10856 auto *LHS = dyn_cast<LoadSDNode>(N0);
10857 auto *RHS = dyn_cast<LoadSDNode>(N1);
10858 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10859 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10860 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10861 ISD::isNON_EXTLoad(LHS)) {
10862 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10863 SDLoc DL(RHS);
10864 uint64_t PtrOff =
10865 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10866 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10867 unsigned Fast = 0;
10868 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10869 RHS->getAddressSpace(), NewAlign,
10870 RHS->getMemOperand()->getFlags(), &Fast) &&
10871 Fast) {
10872 SDValue NewPtr = DAG.getMemBasePlusOffset(
10873 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10874 AddToWorklist(NewPtr.getNode());
10875 SDValue Load = DAG.getLoad(
10876 VT, DL, RHS->getChain(), NewPtr,
10877 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10878 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10879 // Replace the old load's chain with the new load's chain.
10880 WorklistRemover DeadNodes(*this);
10881 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10882 return Load;
10883 }
10884 }
10885 }
10886 }
10887 }
10888
10889 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10890 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10891 // iff We know the shift amount is in range.
10892 // TODO: when is it worth doing SUB(BW, N2) as well?
10893 if (isPowerOf2_32(BitWidth)) {
10894 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10895 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10896 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
10897 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10898 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
10899 }
10900
10901 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10902 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10903 // TODO: Investigate flipping this rotate if only one is legal.
10904 // If funnel shift is legal as well we might be better off avoiding
10905 // non-constant (BW - N2).
10906 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10907 if (N0 == N1 && hasOperation(RotOpc, VT))
10908 return DAG.getNode(RotOpc, DL, VT, N0, N2);
10909
10910 // Simplify, based on bits shifted out of N0/N1.
10912 return SDValue(N, 0);
10913
10914 return SDValue();
10915}
10916
10917SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10918 SDValue N0 = N->getOperand(0);
10919 SDValue N1 = N->getOperand(1);
10920 if (SDValue V = DAG.simplifyShift(N0, N1))
10921 return V;
10922
10923 SDLoc DL(N);
10924 EVT VT = N0.getValueType();
10925
10926 // fold (*shlsat c1, c2) -> c1<<c2
10927 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
10928 return C;
10929
10931
10932 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10933 // fold (sshlsat x, c) -> (shl x, c)
10934 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10935 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10936 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10937
10938 // fold (ushlsat x, c) -> (shl x, c)
10939 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10940 N1C->getAPIntValue().ule(
10942 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10943 }
10944
10945 return SDValue();
10946}
10947
10948// Given a ABS node, detect the following patterns:
10949// (ABS (SUB (EXTEND a), (EXTEND b))).
10950// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10951// Generates UABD/SABD instruction.
10952SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10953 EVT SrcVT = N->getValueType(0);
10954
10955 if (N->getOpcode() == ISD::TRUNCATE)
10956 N = N->getOperand(0).getNode();
10957
10958 if (N->getOpcode() != ISD::ABS)
10959 return SDValue();
10960
10961 EVT VT = N->getValueType(0);
10962 SDValue AbsOp1 = N->getOperand(0);
10963 SDValue Op0, Op1;
10964
10965 if (AbsOp1.getOpcode() != ISD::SUB)
10966 return SDValue();
10967
10968 Op0 = AbsOp1.getOperand(0);
10969 Op1 = AbsOp1.getOperand(1);
10970
10971 unsigned Opc0 = Op0.getOpcode();
10972
10973 // Check if the operands of the sub are (zero|sign)-extended.
10974 // TODO: Should we use ValueTracking instead?
10975 if (Opc0 != Op1.getOpcode() ||
10976 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10977 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10978 // fold (abs (sub nsw x, y)) -> abds(x, y)
10979 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10980 TLI.preferABDSToABSWithNSW(VT)) {
10981 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10982 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10983 }
10984 return SDValue();
10985 }
10986
10987 EVT VT0, VT1;
10988 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10989 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10990 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10991 } else {
10992 VT0 = Op0.getOperand(0).getValueType();
10993 VT1 = Op1.getOperand(0).getValueType();
10994 }
10995 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10996
10997 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10998 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10999 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
11000 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
11001 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
11002 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
11003 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
11004 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
11005 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
11006 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11007 }
11008
11009 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
11010 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
11011 if (hasOperation(ABDOpcode, VT)) {
11012 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
11013 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
11014 }
11015
11016 return SDValue();
11017}
11018
11019SDValue DAGCombiner::visitABS(SDNode *N) {
11020 SDValue N0 = N->getOperand(0);
11021 EVT VT = N->getValueType(0);
11022 SDLoc DL(N);
11023
11024 // fold (abs c1) -> c2
11025 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
11026 return C;
11027 // fold (abs (abs x)) -> (abs x)
11028 if (N0.getOpcode() == ISD::ABS)
11029 return N0;
11030 // fold (abs x) -> x iff not-negative
11031 if (DAG.SignBitIsZero(N0))
11032 return N0;
11033
11034 if (SDValue ABD = foldABSToABD(N, DL))
11035 return ABD;
11036
11037 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
11038 // iff zero_extend/truncate are free.
11039 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
11040 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
11041 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
11042 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
11043 hasOperation(ISD::ABS, ExtVT)) {
11044 return DAG.getNode(
11045 ISD::ZERO_EXTEND, DL, VT,
11046 DAG.getNode(ISD::ABS, DL, ExtVT,
11047 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11048 }
11049 }
11050
11051 return SDValue();
11052}
11053
11054SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11055 SDValue N0 = N->getOperand(0);
11056 EVT VT = N->getValueType(0);
11057 SDLoc DL(N);
11058
11059 // fold (bswap c1) -> c2
11060 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11061 return C;
11062 // fold (bswap (bswap x)) -> x
11063 if (N0.getOpcode() == ISD::BSWAP)
11064 return N0.getOperand(0);
11065
11066 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11067 // isn't supported, it will be expanded to bswap followed by a manual reversal
11068 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11069 // the two bswaps if the bitreverse gets expanded.
11070 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11071 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11072 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11073 }
11074
11075 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11076 // iff x >= bw/2 (i.e. lower half is known zero)
11077 unsigned BW = VT.getScalarSizeInBits();
11078 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11079 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11080 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11081 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11082 ShAmt->getZExtValue() >= (BW / 2) &&
11083 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11084 TLI.isTruncateFree(VT, HalfVT) &&
11085 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11086 SDValue Res = N0.getOperand(0);
11087 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11088 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11089 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
11090 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11091 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11092 return DAG.getZExtOrTrunc(Res, DL, VT);
11093 }
11094 }
11095
11096 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11097 // inverse-shift-of-bswap:
11098 // bswap (X u<< C) --> (bswap X) u>> C
11099 // bswap (X u>> C) --> (bswap X) u<< C
11100 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11101 N0.hasOneUse()) {
11102 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11103 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11104 ShAmt->getZExtValue() % 8 == 0) {
11105 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11106 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11107 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11108 }
11109 }
11110
11111 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11112 return V;
11113
11114 return SDValue();
11115}
11116
11117SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11118 SDValue N0 = N->getOperand(0);
11119 EVT VT = N->getValueType(0);
11120 SDLoc DL(N);
11121
11122 // fold (bitreverse c1) -> c2
11123 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11124 return C;
11125
11126 // fold (bitreverse (bitreverse x)) -> x
11127 if (N0.getOpcode() == ISD::BITREVERSE)
11128 return N0.getOperand(0);
11129
11130 SDValue X, Y;
11131
11132 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11133 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11135 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11136
11137 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11138 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11140 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11141
11142 return SDValue();
11143}
11144
11145SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11146 SDValue N0 = N->getOperand(0);
11147 EVT VT = N->getValueType(0);
11148 SDLoc DL(N);
11149
11150 // fold (ctlz c1) -> c2
11151 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11152 return C;
11153
11154 // If the value is known never to be zero, switch to the undef version.
11155 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11156 if (DAG.isKnownNeverZero(N0))
11157 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11158
11159 return SDValue();
11160}
11161
11162SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11163 SDValue N0 = N->getOperand(0);
11164 EVT VT = N->getValueType(0);
11165 SDLoc DL(N);
11166
11167 // fold (ctlz_zero_undef c1) -> c2
11168 if (SDValue C =
11170 return C;
11171 return SDValue();
11172}
11173
11174SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11175 SDValue N0 = N->getOperand(0);
11176 EVT VT = N->getValueType(0);
11177 SDLoc DL(N);
11178
11179 // fold (cttz c1) -> c2
11180 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11181 return C;
11182
11183 // If the value is known never to be zero, switch to the undef version.
11184 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11185 if (DAG.isKnownNeverZero(N0))
11186 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11187
11188 return SDValue();
11189}
11190
11191SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11192 SDValue N0 = N->getOperand(0);
11193 EVT VT = N->getValueType(0);
11194 SDLoc DL(N);
11195
11196 // fold (cttz_zero_undef c1) -> c2
11197 if (SDValue C =
11199 return C;
11200 return SDValue();
11201}
11202
11203SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11204 SDValue N0 = N->getOperand(0);
11205 EVT VT = N->getValueType(0);
11206 unsigned NumBits = VT.getScalarSizeInBits();
11207 SDLoc DL(N);
11208
11209 // fold (ctpop c1) -> c2
11210 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11211 return C;
11212
11213 // If the source is being shifted, but doesn't affect any active bits,
11214 // then we can call CTPOP on the shift source directly.
11215 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11216 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11217 const APInt &Amt = AmtC->getAPIntValue();
11218 if (Amt.ult(NumBits)) {
11219 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11220 if ((N0.getOpcode() == ISD::SRL &&
11221 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11222 (N0.getOpcode() == ISD::SHL &&
11223 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11224 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11225 }
11226 }
11227 }
11228 }
11229
11230 // If the upper bits are known to be zero, then see if its profitable to
11231 // only count the lower bits.
11232 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11233 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11234 if (hasOperation(ISD::CTPOP, HalfVT) &&
11235 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11236 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11237 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11238 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11239 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11240 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11241 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11242 }
11243 }
11244 }
11245
11246 return SDValue();
11247}
11248
11250 SDValue RHS, const SDNodeFlags Flags,
11251 const TargetLowering &TLI) {
11252 EVT VT = LHS.getValueType();
11253 if (!VT.isFloatingPoint())
11254 return false;
11255
11256 const TargetOptions &Options = DAG.getTarget().Options;
11257
11258 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11260 (Flags.hasNoNaNs() ||
11261 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11262}
11263
11265 SDValue RHS, SDValue True, SDValue False,
11267 const TargetLowering &TLI,
11268 SelectionDAG &DAG) {
11269 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11270 switch (CC) {
11271 case ISD::SETOLT:
11272 case ISD::SETOLE:
11273 case ISD::SETLT:
11274 case ISD::SETLE:
11275 case ISD::SETULT:
11276 case ISD::SETULE: {
11277 // Since it's known never nan to get here already, either fminnum or
11278 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11279 // expanded in terms of it.
11280 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11281 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11282 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11283
11284 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11285 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11286 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11287 return SDValue();
11288 }
11289 case ISD::SETOGT:
11290 case ISD::SETOGE:
11291 case ISD::SETGT:
11292 case ISD::SETGE:
11293 case ISD::SETUGT:
11294 case ISD::SETUGE: {
11295 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11296 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11297 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11298
11299 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11300 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11301 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11302 return SDValue();
11303 }
11304 default:
11305 return SDValue();
11306 }
11307}
11308
11309/// Generate Min/Max node
11310SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11311 SDValue RHS, SDValue True,
11312 SDValue False, ISD::CondCode CC) {
11313 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11314 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11315
11316 // If we can't directly match this, try to see if we can pull an fneg out of
11317 // the select.
11319 True, DAG, LegalOperations, ForCodeSize);
11320 if (!NegTrue)
11321 return SDValue();
11322
11323 HandleSDNode NegTrueHandle(NegTrue);
11324
11325 // Try to unfold an fneg from the select if we are comparing the negated
11326 // constant.
11327 //
11328 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11329 //
11330 // TODO: Handle fabs
11331 if (LHS == NegTrue) {
11332 // If we can't directly match this, try to see if we can pull an fneg out of
11333 // the select.
11335 RHS, DAG, LegalOperations, ForCodeSize);
11336 if (NegRHS) {
11337 HandleSDNode NegRHSHandle(NegRHS);
11338 if (NegRHS == False) {
11339 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11340 False, CC, TLI, DAG);
11341 if (Combined)
11342 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11343 }
11344 }
11345 }
11346
11347 return SDValue();
11348}
11349
11350/// If a (v)select has a condition value that is a sign-bit test, try to smear
11351/// the condition operand sign-bit across the value width and use it as a mask.
11353 SelectionDAG &DAG) {
11354 SDValue Cond = N->getOperand(0);
11355 SDValue C1 = N->getOperand(1);
11356 SDValue C2 = N->getOperand(2);
11358 return SDValue();
11359
11360 EVT VT = N->getValueType(0);
11361 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11362 VT != Cond.getOperand(0).getValueType())
11363 return SDValue();
11364
11365 // The inverted-condition + commuted-select variants of these patterns are
11366 // canonicalized to these forms in IR.
11367 SDValue X = Cond.getOperand(0);
11368 SDValue CondC = Cond.getOperand(1);
11369 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11370 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11372 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11373 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11374 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11375 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11376 }
11377 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11378 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11379 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11380 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11381 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11382 }
11383 return SDValue();
11384}
11385
11387 const TargetLowering &TLI) {
11388 if (!TLI.convertSelectOfConstantsToMath(VT))
11389 return false;
11390
11391 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11392 return true;
11394 return true;
11395
11396 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11397 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11398 return true;
11399 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11400 return true;
11401
11402 return false;
11403}
11404
11405SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11406 SDValue Cond = N->getOperand(0);
11407 SDValue N1 = N->getOperand(1);
11408 SDValue N2 = N->getOperand(2);
11409 EVT VT = N->getValueType(0);
11410 EVT CondVT = Cond.getValueType();
11411 SDLoc DL(N);
11412
11413 if (!VT.isInteger())
11414 return SDValue();
11415
11416 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11417 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11418 if (!C1 || !C2)
11419 return SDValue();
11420
11421 if (CondVT != MVT::i1 || LegalOperations) {
11422 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11423 // We can't do this reliably if integer based booleans have different contents
11424 // to floating point based booleans. This is because we can't tell whether we
11425 // have an integer-based boolean or a floating-point-based boolean unless we
11426 // can find the SETCC that produced it and inspect its operands. This is
11427 // fairly easy if C is the SETCC node, but it can potentially be
11428 // undiscoverable (or not reasonably discoverable). For example, it could be
11429 // in another basic block or it could require searching a complicated
11430 // expression.
11431 if (CondVT.isInteger() &&
11432 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11434 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11436 C1->isZero() && C2->isOne()) {
11437 SDValue NotCond =
11438 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11439 if (VT.bitsEq(CondVT))
11440 return NotCond;
11441 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11442 }
11443
11444 return SDValue();
11445 }
11446
11447 // Only do this before legalization to avoid conflicting with target-specific
11448 // transforms in the other direction (create a select from a zext/sext). There
11449 // is also a target-independent combine here in DAGCombiner in the other
11450 // direction for (select Cond, -1, 0) when the condition is not i1.
11451 assert(CondVT == MVT::i1 && !LegalOperations);
11452
11453 // select Cond, 1, 0 --> zext (Cond)
11454 if (C1->isOne() && C2->isZero())
11455 return DAG.getZExtOrTrunc(Cond, DL, VT);
11456
11457 // select Cond, -1, 0 --> sext (Cond)
11458 if (C1->isAllOnes() && C2->isZero())
11459 return DAG.getSExtOrTrunc(Cond, DL, VT);
11460
11461 // select Cond, 0, 1 --> zext (!Cond)
11462 if (C1->isZero() && C2->isOne()) {
11463 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11464 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11465 return NotCond;
11466 }
11467
11468 // select Cond, 0, -1 --> sext (!Cond)
11469 if (C1->isZero() && C2->isAllOnes()) {
11470 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11471 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11472 return NotCond;
11473 }
11474
11475 // Use a target hook because some targets may prefer to transform in the
11476 // other direction.
11478 return SDValue();
11479
11480 // For any constants that differ by 1, we can transform the select into
11481 // an extend and add.
11482 const APInt &C1Val = C1->getAPIntValue();
11483 const APInt &C2Val = C2->getAPIntValue();
11484
11485 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11486 if (C1Val - 1 == C2Val) {
11487 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11488 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11489 }
11490
11491 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11492 if (C1Val + 1 == C2Val) {
11493 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11494 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11495 }
11496
11497 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11498 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11499 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11500 SDValue ShAmtC =
11501 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11502 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11503 }
11504
11505 // select Cond, -1, C --> or (sext Cond), C
11506 if (C1->isAllOnes()) {
11507 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11508 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11509 }
11510
11511 // select Cond, C, -1 --> or (sext (not Cond)), C
11512 if (C2->isAllOnes()) {
11513 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11514 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11515 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11516 }
11517
11519 return V;
11520
11521 return SDValue();
11522}
11523
11524template <class MatchContextClass>
11526 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11527 N->getOpcode() == ISD::VP_SELECT) &&
11528 "Expected a (v)(vp.)select");
11529 SDValue Cond = N->getOperand(0);
11530 SDValue T = N->getOperand(1), F = N->getOperand(2);
11531 EVT VT = N->getValueType(0);
11532 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11533 MatchContextClass matcher(DAG, TLI, N);
11534
11535 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11536 return SDValue();
11537
11538 // select Cond, Cond, F --> or Cond, F
11539 // select Cond, 1, F --> or Cond, F
11540 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11541 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11542
11543 // select Cond, T, Cond --> and Cond, T
11544 // select Cond, T, 0 --> and Cond, T
11545 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11546 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11547
11548 // select Cond, T, 1 --> or (not Cond), T
11549 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11550 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11551 DAG.getAllOnesConstant(SDLoc(N), VT));
11552 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11553 }
11554
11555 // select Cond, 0, F --> and (not Cond), F
11556 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11557 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11558 DAG.getAllOnesConstant(SDLoc(N), VT));
11559 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11560 }
11561
11562 return SDValue();
11563}
11564
11566 SDValue N0 = N->getOperand(0);
11567 SDValue N1 = N->getOperand(1);
11568 SDValue N2 = N->getOperand(2);
11569 EVT VT = N->getValueType(0);
11570 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11571 return SDValue();
11572
11573 SDValue Cond0 = N0.getOperand(0);
11574 SDValue Cond1 = N0.getOperand(1);
11575 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11576 if (VT != Cond0.getValueType())
11577 return SDValue();
11578
11579 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11580 // compare is inverted from that pattern ("Cond0 s> -1").
11581 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11582 ; // This is the pattern we are looking for.
11583 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11584 std::swap(N1, N2);
11585 else
11586 return SDValue();
11587
11588 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11589 if (isNullOrNullSplat(N2)) {
11590 SDLoc DL(N);
11591 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11592 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11593 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11594 }
11595
11596 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11597 if (isAllOnesOrAllOnesSplat(N1)) {
11598 SDLoc DL(N);
11599 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11600 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11601 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11602 }
11603
11604 // If we have to invert the sign bit mask, only do that transform if the
11605 // target has a bitwise 'and not' instruction (the invert is free).
11606 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11607 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11608 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11609 SDLoc DL(N);
11610 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11611 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11612 SDValue Not = DAG.getNOT(DL, Sra, VT);
11613 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11614 }
11615
11616 // TODO: There's another pattern in this family, but it may require
11617 // implementing hasOrNot() to check for profitability:
11618 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11619
11620 return SDValue();
11621}
11622
11623SDValue DAGCombiner::visitSELECT(SDNode *N) {
11624 SDValue N0 = N->getOperand(0);
11625 SDValue N1 = N->getOperand(1);
11626 SDValue N2 = N->getOperand(2);
11627 EVT VT = N->getValueType(0);
11628 EVT VT0 = N0.getValueType();
11629 SDLoc DL(N);
11630 SDNodeFlags Flags = N->getFlags();
11631
11632 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11633 return V;
11634
11635 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11636 return V;
11637
11638 // select (not Cond), N1, N2 -> select Cond, N2, N1
11639 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11640 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11641 SelectOp->setFlags(Flags);
11642 return SelectOp;
11643 }
11644
11645 if (SDValue V = foldSelectOfConstants(N))
11646 return V;
11647
11648 // If we can fold this based on the true/false value, do so.
11649 if (SimplifySelectOps(N, N1, N2))
11650 return SDValue(N, 0); // Don't revisit N.
11651
11652 if (VT0 == MVT::i1) {
11653 // The code in this block deals with the following 2 equivalences:
11654 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11655 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11656 // The target can specify its preferred form with the
11657 // shouldNormalizeToSelectSequence() callback. However we always transform
11658 // to the right anyway if we find the inner select exists in the DAG anyway
11659 // and we always transform to the left side if we know that we can further
11660 // optimize the combination of the conditions.
11661 bool normalizeToSequence =
11663 // select (and Cond0, Cond1), X, Y
11664 // -> select Cond0, (select Cond1, X, Y), Y
11665 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11666 SDValue Cond0 = N0->getOperand(0);
11667 SDValue Cond1 = N0->getOperand(1);
11668 SDValue InnerSelect =
11669 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11670 if (normalizeToSequence || !InnerSelect.use_empty())
11671 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11672 InnerSelect, N2, Flags);
11673 // Cleanup on failure.
11674 if (InnerSelect.use_empty())
11675 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11676 }
11677 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11678 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11679 SDValue Cond0 = N0->getOperand(0);
11680 SDValue Cond1 = N0->getOperand(1);
11681 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11682 Cond1, N1, N2, Flags);
11683 if (normalizeToSequence || !InnerSelect.use_empty())
11684 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11685 InnerSelect, Flags);
11686 // Cleanup on failure.
11687 if (InnerSelect.use_empty())
11688 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11689 }
11690
11691 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11692 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11693 SDValue N1_0 = N1->getOperand(0);
11694 SDValue N1_1 = N1->getOperand(1);
11695 SDValue N1_2 = N1->getOperand(2);
11696 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11697 // Create the actual and node if we can generate good code for it.
11698 if (!normalizeToSequence) {
11699 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11700 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11701 N2, Flags);
11702 }
11703 // Otherwise see if we can optimize the "and" to a better pattern.
11704 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11705 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11706 N2, Flags);
11707 }
11708 }
11709 }
11710 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11711 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11712 SDValue N2_0 = N2->getOperand(0);
11713 SDValue N2_1 = N2->getOperand(1);
11714 SDValue N2_2 = N2->getOperand(2);
11715 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11716 // Create the actual or node if we can generate good code for it.
11717 if (!normalizeToSequence) {
11718 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11719 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11720 N2_2, Flags);
11721 }
11722 // Otherwise see if we can optimize to a better pattern.
11723 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11724 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11725 N2_2, Flags);
11726 }
11727 }
11728 }
11729
11730 // Fold selects based on a setcc into other things, such as min/max/abs.
11731 if (N0.getOpcode() == ISD::SETCC) {
11732 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11733 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11734
11735 // select (fcmp lt x, y), x, y -> fminnum x, y
11736 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11737 //
11738 // This is OK if we don't care what happens if either operand is a NaN.
11739 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
11740 if (SDValue FMinMax =
11741 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11742 return FMinMax;
11743
11744 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11745 // This is conservatively limited to pre-legal-operations to give targets
11746 // a chance to reverse the transform if they want to do that. Also, it is
11747 // unlikely that the pattern would be formed late, so it's probably not
11748 // worth going through the other checks.
11749 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11750 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11751 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11752 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11753 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11754 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11755 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11756 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11757 //
11758 // The IR equivalent of this transform would have this form:
11759 // %a = add %x, C
11760 // %c = icmp ugt %x, ~C
11761 // %r = select %c, -1, %a
11762 // =>
11763 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11764 // %u0 = extractvalue %u, 0
11765 // %u1 = extractvalue %u, 1
11766 // %r = select %u1, -1, %u0
11767 SDVTList VTs = DAG.getVTList(VT, VT0);
11768 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11769 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11770 }
11771 }
11772
11773 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11774 (!LegalOperations &&
11776 // Any flags available in a select/setcc fold will be on the setcc as they
11777 // migrated from fcmp
11778 Flags = N0->getFlags();
11779 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11780 N2, N0.getOperand(2));
11781 SelectNode->setFlags(Flags);
11782 return SelectNode;
11783 }
11784
11785 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11786 return NewSel;
11787 }
11788
11789 if (!VT.isVector())
11790 if (SDValue BinOp = foldSelectOfBinops(N))
11791 return BinOp;
11792
11793 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11794 return R;
11795
11796 return SDValue();
11797}
11798
11799// This function assumes all the vselect's arguments are CONCAT_VECTOR
11800// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11802 SDLoc DL(N);
11803 SDValue Cond = N->getOperand(0);
11804 SDValue LHS = N->getOperand(1);
11805 SDValue RHS = N->getOperand(2);
11806 EVT VT = N->getValueType(0);
11807 int NumElems = VT.getVectorNumElements();
11808 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11809 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11810 Cond.getOpcode() == ISD::BUILD_VECTOR);
11811
11812 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11813 // binary ones here.
11814 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11815 return SDValue();
11816
11817 // We're sure we have an even number of elements due to the
11818 // concat_vectors we have as arguments to vselect.
11819 // Skip BV elements until we find one that's not an UNDEF
11820 // After we find an UNDEF element, keep looping until we get to half the
11821 // length of the BV and see if all the non-undef nodes are the same.
11822 ConstantSDNode *BottomHalf = nullptr;
11823 for (int i = 0; i < NumElems / 2; ++i) {
11824 if (Cond->getOperand(i)->isUndef())
11825 continue;
11826
11827 if (BottomHalf == nullptr)
11828 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11829 else if (Cond->getOperand(i).getNode() != BottomHalf)
11830 return SDValue();
11831 }
11832
11833 // Do the same for the second half of the BuildVector
11834 ConstantSDNode *TopHalf = nullptr;
11835 for (int i = NumElems / 2; i < NumElems; ++i) {
11836 if (Cond->getOperand(i)->isUndef())
11837 continue;
11838
11839 if (TopHalf == nullptr)
11840 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11841 else if (Cond->getOperand(i).getNode() != TopHalf)
11842 return SDValue();
11843 }
11844
11845 assert(TopHalf && BottomHalf &&
11846 "One half of the selector was all UNDEFs and the other was all the "
11847 "same value. This should have been addressed before this function.");
11848 return DAG.getNode(
11850 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11851 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11852}
11853
11854bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11855 SelectionDAG &DAG, const SDLoc &DL) {
11856
11857 // Only perform the transformation when existing operands can be reused.
11858 if (IndexIsScaled)
11859 return false;
11860
11861 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11862 return false;
11863
11864 EVT VT = BasePtr.getValueType();
11865
11866 if (SDValue SplatVal = DAG.getSplatValue(Index);
11867 SplatVal && !isNullConstant(SplatVal) &&
11868 SplatVal.getValueType() == VT) {
11869 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11870 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11871 return true;
11872 }
11873
11874 if (Index.getOpcode() != ISD::ADD)
11875 return false;
11876
11877 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11878 SplatVal && SplatVal.getValueType() == VT) {
11879 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11880 Index = Index.getOperand(1);
11881 return true;
11882 }
11883 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11884 SplatVal && SplatVal.getValueType() == VT) {
11885 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11886 Index = Index.getOperand(0);
11887 return true;
11888 }
11889 return false;
11890}
11891
11892// Fold sext/zext of index into index type.
11894 SelectionDAG &DAG) {
11895 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11896
11897 // It's always safe to look through zero extends.
11898 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11899 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11900 IndexType = ISD::UNSIGNED_SCALED;
11901 Index = Index.getOperand(0);
11902 return true;
11903 }
11904 if (ISD::isIndexTypeSigned(IndexType)) {
11905 IndexType = ISD::UNSIGNED_SCALED;
11906 return true;
11907 }
11908 }
11909
11910 // It's only safe to look through sign extends when Index is signed.
11911 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11912 ISD::isIndexTypeSigned(IndexType) &&
11913 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11914 Index = Index.getOperand(0);
11915 return true;
11916 }
11917
11918 return false;
11919}
11920
11921SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11922 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11923 SDValue Mask = MSC->getMask();
11924 SDValue Chain = MSC->getChain();
11925 SDValue Index = MSC->getIndex();
11926 SDValue Scale = MSC->getScale();
11927 SDValue StoreVal = MSC->getValue();
11928 SDValue BasePtr = MSC->getBasePtr();
11929 SDValue VL = MSC->getVectorLength();
11930 ISD::MemIndexType IndexType = MSC->getIndexType();
11931 SDLoc DL(N);
11932
11933 // Zap scatters with a zero mask.
11935 return Chain;
11936
11937 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11938 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11939 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11940 DL, Ops, MSC->getMemOperand(), IndexType);
11941 }
11942
11943 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11944 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11945 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11946 DL, Ops, MSC->getMemOperand(), IndexType);
11947 }
11948
11949 return SDValue();
11950}
11951
11952SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11953 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11954 SDValue Mask = MSC->getMask();
11955 SDValue Chain = MSC->getChain();
11956 SDValue Index = MSC->getIndex();
11957 SDValue Scale = MSC->getScale();
11958 SDValue StoreVal = MSC->getValue();
11959 SDValue BasePtr = MSC->getBasePtr();
11960 ISD::MemIndexType IndexType = MSC->getIndexType();
11961 SDLoc DL(N);
11962
11963 // Zap scatters with a zero mask.
11965 return Chain;
11966
11967 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11968 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11969 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11970 DL, Ops, MSC->getMemOperand(), IndexType,
11971 MSC->isTruncatingStore());
11972 }
11973
11974 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11975 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11976 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11977 DL, Ops, MSC->getMemOperand(), IndexType,
11978 MSC->isTruncatingStore());
11979 }
11980
11981 return SDValue();
11982}
11983
11984SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11985 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11986 SDValue Mask = MST->getMask();
11987 SDValue Chain = MST->getChain();
11988 SDValue Value = MST->getValue();
11989 SDValue Ptr = MST->getBasePtr();
11990 SDLoc DL(N);
11991
11992 // Zap masked stores with a zero mask.
11994 return Chain;
11995
11996 // Remove a masked store if base pointers and masks are equal.
11997 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11998 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11999 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
12000 !MST->getBasePtr().isUndef() &&
12001 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
12002 MST1->getMemoryVT().getStoreSize()) ||
12004 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
12005 MST->getMemoryVT().getStoreSize())) {
12006 CombineTo(MST1, MST1->getChain());
12007 if (N->getOpcode() != ISD::DELETED_NODE)
12008 AddToWorklist(N);
12009 return SDValue(N, 0);
12010 }
12011 }
12012
12013 // If this is a masked load with an all ones mask, we can use a unmasked load.
12014 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12015 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12016 !MST->isCompressingStore() && !MST->isTruncatingStore())
12017 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12018 MST->getBasePtr(), MST->getPointerInfo(),
12019 MST->getOriginalAlign(),
12020 MST->getMemOperand()->getFlags(), MST->getAAInfo());
12021
12022 // Try transforming N to an indexed store.
12023 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12024 return SDValue(N, 0);
12025
12026 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12027 Value.getValueType().isInteger() &&
12028 (!isa<ConstantSDNode>(Value) ||
12029 !cast<ConstantSDNode>(Value)->isOpaque())) {
12030 APInt TruncDemandedBits =
12031 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12033
12034 // See if we can simplify the operation with
12035 // SimplifyDemandedBits, which only works if the value has a single use.
12036 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12037 // Re-visit the store if anything changed and the store hasn't been merged
12038 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12039 // node back to the worklist if necessary, but we also need to re-visit
12040 // the Store node itself.
12041 if (N->getOpcode() != ISD::DELETED_NODE)
12042 AddToWorklist(N);
12043 return SDValue(N, 0);
12044 }
12045 }
12046
12047 // If this is a TRUNC followed by a masked store, fold this into a masked
12048 // truncating store. We can do this even if this is already a masked
12049 // truncstore.
12050 // TODO: Try combine to masked compress store if possiable.
12051 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12052 MST->isUnindexed() && !MST->isCompressingStore() &&
12053 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12054 MST->getMemoryVT(), LegalOperations)) {
12055 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12056 Value.getOperand(0).getValueType());
12057 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12058 MST->getOffset(), Mask, MST->getMemoryVT(),
12059 MST->getMemOperand(), MST->getAddressingMode(),
12060 /*IsTruncating=*/true);
12061 }
12062
12063 return SDValue();
12064}
12065
12066SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12067 auto *SST = cast<VPStridedStoreSDNode>(N);
12068 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12069 // Combine strided stores with unit-stride to a regular VP store.
12070 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12071 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12072 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12073 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12074 SST->getVectorLength(), SST->getMemoryVT(),
12075 SST->getMemOperand(), SST->getAddressingMode(),
12076 SST->isTruncatingStore(), SST->isCompressingStore());
12077 }
12078 return SDValue();
12079}
12080
12081SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12082 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12083 SDValue Mask = MGT->getMask();
12084 SDValue Chain = MGT->getChain();
12085 SDValue Index = MGT->getIndex();
12086 SDValue Scale = MGT->getScale();
12087 SDValue BasePtr = MGT->getBasePtr();
12088 SDValue VL = MGT->getVectorLength();
12089 ISD::MemIndexType IndexType = MGT->getIndexType();
12090 SDLoc DL(N);
12091
12092 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12093 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12094 return DAG.getGatherVP(
12095 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12096 Ops, MGT->getMemOperand(), IndexType);
12097 }
12098
12099 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12100 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12101 return DAG.getGatherVP(
12102 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12103 Ops, MGT->getMemOperand(), IndexType);
12104 }
12105
12106 return SDValue();
12107}
12108
12109SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12110 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12111 SDValue Mask = MGT->getMask();
12112 SDValue Chain = MGT->getChain();
12113 SDValue Index = MGT->getIndex();
12114 SDValue Scale = MGT->getScale();
12115 SDValue PassThru = MGT->getPassThru();
12116 SDValue BasePtr = MGT->getBasePtr();
12117 ISD::MemIndexType IndexType = MGT->getIndexType();
12118 SDLoc DL(N);
12119
12120 // Zap gathers with a zero mask.
12122 return CombineTo(N, PassThru, MGT->getChain());
12123
12124 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12125 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12126 return DAG.getMaskedGather(
12127 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12128 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12129 }
12130
12131 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12132 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12133 return DAG.getMaskedGather(
12134 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12135 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12136 }
12137
12138 return SDValue();
12139}
12140
12141SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12142 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12143 SDValue Mask = MLD->getMask();
12144 SDLoc DL(N);
12145
12146 // Zap masked loads with a zero mask.
12148 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12149
12150 // If this is a masked load with an all ones mask, we can use a unmasked load.
12151 // FIXME: Can we do this for indexed, expanding, or extending loads?
12152 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12153 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12154 SDValue NewLd = DAG.getLoad(
12155 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12156 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12157 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12158 return CombineTo(N, NewLd, NewLd.getValue(1));
12159 }
12160
12161 // Try transforming N to an indexed load.
12162 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12163 return SDValue(N, 0);
12164
12165 return SDValue();
12166}
12167
12168SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12169 auto *SLD = cast<VPStridedLoadSDNode>(N);
12170 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12171 // Combine strided loads with unit-stride to a regular VP load.
12172 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12173 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12174 SDValue NewLd = DAG.getLoadVP(
12175 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12176 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12177 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12178 SLD->getMemOperand(), SLD->isExpandingLoad());
12179 return CombineTo(N, NewLd, NewLd.getValue(1));
12180 }
12181 return SDValue();
12182}
12183
12184/// A vector select of 2 constant vectors can be simplified to math/logic to
12185/// avoid a variable select instruction and possibly avoid constant loads.
12186SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12187 SDValue Cond = N->getOperand(0);
12188 SDValue N1 = N->getOperand(1);
12189 SDValue N2 = N->getOperand(2);
12190 EVT VT = N->getValueType(0);
12191 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12195 return SDValue();
12196
12197 // Check if we can use the condition value to increment/decrement a single
12198 // constant value. This simplifies a select to an add and removes a constant
12199 // load/materialization from the general case.
12200 bool AllAddOne = true;
12201 bool AllSubOne = true;
12202 unsigned Elts = VT.getVectorNumElements();
12203 for (unsigned i = 0; i != Elts; ++i) {
12204 SDValue N1Elt = N1.getOperand(i);
12205 SDValue N2Elt = N2.getOperand(i);
12206 if (N1Elt.isUndef() || N2Elt.isUndef())
12207 continue;
12208 if (N1Elt.getValueType() != N2Elt.getValueType()) {
12209 AllAddOne = false;
12210 AllSubOne = false;
12211 break;
12212 }
12213
12214 const APInt &C1 = N1Elt->getAsAPIntVal();
12215 const APInt &C2 = N2Elt->getAsAPIntVal();
12216 if (C1 != C2 + 1)
12217 AllAddOne = false;
12218 if (C1 != C2 - 1)
12219 AllSubOne = false;
12220 }
12221
12222 // Further simplifications for the extra-special cases where the constants are
12223 // all 0 or all -1 should be implemented as folds of these patterns.
12224 SDLoc DL(N);
12225 if (AllAddOne || AllSubOne) {
12226 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12227 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12228 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12229 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12230 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12231 }
12232
12233 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12234 APInt Pow2C;
12235 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12236 isNullOrNullSplat(N2)) {
12237 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12238 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12239 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12240 }
12241
12243 return V;
12244
12245 // The general case for select-of-constants:
12246 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12247 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12248 // leave that to a machine-specific pass.
12249 return SDValue();
12250}
12251
12252SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12253 SDValue N0 = N->getOperand(0);
12254 SDValue N1 = N->getOperand(1);
12255 SDValue N2 = N->getOperand(2);
12256
12257 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12258 return V;
12259
12260 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12261 return V;
12262
12263 return SDValue();
12264}
12265
12266SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12267 SDValue N0 = N->getOperand(0);
12268 SDValue N1 = N->getOperand(1);
12269 SDValue N2 = N->getOperand(2);
12270 EVT VT = N->getValueType(0);
12271 SDLoc DL(N);
12272
12273 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12274 return V;
12275
12276 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12277 return V;
12278
12279 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12280 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12281 return DAG.getSelect(DL, VT, F, N2, N1);
12282
12283 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12284 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12287 TLI.getBooleanContents(N0.getValueType()) ==
12289 return DAG.getNode(
12290 ISD::ADD, DL, N1.getValueType(), N2,
12291 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12292 }
12293
12294 // Canonicalize integer abs.
12295 // vselect (setg[te] X, 0), X, -X ->
12296 // vselect (setgt X, -1), X, -X ->
12297 // vselect (setl[te] X, 0), -X, X ->
12298 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12299 if (N0.getOpcode() == ISD::SETCC) {
12300 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12301 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12302 bool isAbs = false;
12303 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12304
12305 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12306 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12307 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12309 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12310 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12312
12313 if (isAbs) {
12315 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12316
12317 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12318 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12319 DL, getShiftAmountTy(VT)));
12320 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12321 AddToWorklist(Shift.getNode());
12322 AddToWorklist(Add.getNode());
12323 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12324 }
12325
12326 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12327 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12328 //
12329 // This is OK if we don't care about what happens if either operand is a
12330 // NaN.
12331 //
12332 if (N0.hasOneUse() &&
12333 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12334 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12335 return FMinMax;
12336 }
12337
12338 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12339 return S;
12340 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12341 return S;
12342
12343 // If this select has a condition (setcc) with narrower operands than the
12344 // select, try to widen the compare to match the select width.
12345 // TODO: This should be extended to handle any constant.
12346 // TODO: This could be extended to handle non-loading patterns, but that
12347 // requires thorough testing to avoid regressions.
12348 if (isNullOrNullSplat(RHS)) {
12349 EVT NarrowVT = LHS.getValueType();
12351 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12352 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12353 unsigned WideWidth = WideVT.getScalarSizeInBits();
12354 bool IsSigned = isSignedIntSetCC(CC);
12355 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12356 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12357 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12358 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12359 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12360 // Both compare operands can be widened for free. The LHS can use an
12361 // extended load, and the RHS is a constant:
12362 // vselect (ext (setcc load(X), C)), N1, N2 -->
12363 // vselect (setcc extload(X), C'), N1, N2
12364 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12365 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12366 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12367 EVT WideSetCCVT = getSetCCResultType(WideVT);
12368 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12369 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12370 }
12371 }
12372
12373 // Match VSELECTs with absolute difference patterns.
12374 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12375 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12376 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12377 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12378 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12379 N1.getOperand(0) == N2.getOperand(1) &&
12380 N1.getOperand(1) == N2.getOperand(0)) {
12381 bool IsSigned = isSignedIntSetCC(CC);
12382 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12383 if (hasOperation(ABDOpc, VT)) {
12384 switch (CC) {
12385 case ISD::SETGT:
12386 case ISD::SETGE:
12387 case ISD::SETUGT:
12388 case ISD::SETUGE:
12389 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12390 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12391 break;
12392 case ISD::SETLT:
12393 case ISD::SETLE:
12394 case ISD::SETULT:
12395 case ISD::SETULE:
12396 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12397 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12398 break;
12399 default:
12400 break;
12401 }
12402 }
12403 }
12404
12405 // Match VSELECTs into add with unsigned saturation.
12406 if (hasOperation(ISD::UADDSAT, VT)) {
12407 // Check if one of the arms of the VSELECT is vector with all bits set.
12408 // If it's on the left side invert the predicate to simplify logic below.
12409 SDValue Other;
12410 ISD::CondCode SatCC = CC;
12412 Other = N2;
12413 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12414 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12415 Other = N1;
12416 }
12417
12418 if (Other && Other.getOpcode() == ISD::ADD) {
12419 SDValue CondLHS = LHS, CondRHS = RHS;
12420 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12421
12422 // Canonicalize condition operands.
12423 if (SatCC == ISD::SETUGE) {
12424 std::swap(CondLHS, CondRHS);
12425 SatCC = ISD::SETULE;
12426 }
12427
12428 // We can test against either of the addition operands.
12429 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12430 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12431 if (SatCC == ISD::SETULE && Other == CondRHS &&
12432 (OpLHS == CondLHS || OpRHS == CondLHS))
12433 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12434
12435 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12436 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12437 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12438 CondLHS == OpLHS) {
12439 // If the RHS is a constant we have to reverse the const
12440 // canonicalization.
12441 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12442 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12443 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12444 };
12445 if (SatCC == ISD::SETULE &&
12446 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12447 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12448 }
12449 }
12450 }
12451
12452 // Match VSELECTs into sub with unsigned saturation.
12453 if (hasOperation(ISD::USUBSAT, VT)) {
12454 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12455 // the left side invert the predicate to simplify logic below.
12456 SDValue Other;
12457 ISD::CondCode SatCC = CC;
12459 Other = N2;
12460 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12462 Other = N1;
12463 }
12464
12465 // zext(x) >= y ? trunc(zext(x) - y) : 0
12466 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12467 // zext(x) > y ? trunc(zext(x) - y) : 0
12468 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12469 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12470 Other.getOperand(0).getOpcode() == ISD::SUB &&
12471 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12472 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12473 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12474 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12475 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12476 DAG, DL))
12477 return R;
12478 }
12479
12480 if (Other && Other.getNumOperands() == 2) {
12481 SDValue CondRHS = RHS;
12482 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12483
12484 if (OpLHS == LHS) {
12485 // Look for a general sub with unsigned saturation first.
12486 // x >= y ? x-y : 0 --> usubsat x, y
12487 // x > y ? x-y : 0 --> usubsat x, y
12488 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12489 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12490 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12491
12492 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12493 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12494 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12495 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12496 // If the RHS is a constant we have to reverse the const
12497 // canonicalization.
12498 // x > C-1 ? x+-C : 0 --> usubsat x, C
12499 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12500 return (!Op && !Cond) ||
12501 (Op && Cond &&
12502 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12503 };
12504 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12505 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12506 /*AllowUndefs*/ true)) {
12507 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12508 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12509 }
12510
12511 // Another special case: If C was a sign bit, the sub has been
12512 // canonicalized into a xor.
12513 // FIXME: Would it be better to use computeKnownBits to
12514 // determine whether it's safe to decanonicalize the xor?
12515 // x s< 0 ? x^C : 0 --> usubsat x, C
12516 APInt SplatValue;
12517 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12518 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12520 SplatValue.isSignMask()) {
12521 // Note that we have to rebuild the RHS constant here to
12522 // ensure we don't rely on particular values of undef lanes.
12523 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12524 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12525 }
12526 }
12527 }
12528 }
12529 }
12530 }
12531 }
12532
12533 if (SimplifySelectOps(N, N1, N2))
12534 return SDValue(N, 0); // Don't revisit N.
12535
12536 // Fold (vselect all_ones, N1, N2) -> N1
12538 return N1;
12539 // Fold (vselect all_zeros, N1, N2) -> N2
12541 return N2;
12542
12543 // The ConvertSelectToConcatVector function is assuming both the above
12544 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12545 // and addressed.
12546 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12549 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12550 return CV;
12551 }
12552
12553 if (SDValue V = foldVSelectOfConstants(N))
12554 return V;
12555
12556 if (hasOperation(ISD::SRA, VT))
12558 return V;
12559
12561 return SDValue(N, 0);
12562
12563 return SDValue();
12564}
12565
12566SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12567 SDValue N0 = N->getOperand(0);
12568 SDValue N1 = N->getOperand(1);
12569 SDValue N2 = N->getOperand(2);
12570 SDValue N3 = N->getOperand(3);
12571 SDValue N4 = N->getOperand(4);
12572 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12573
12574 // fold select_cc lhs, rhs, x, x, cc -> x
12575 if (N2 == N3)
12576 return N2;
12577
12578 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12579 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12580 isNullConstant(N1))
12581 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12582
12583 // Determine if the condition we're dealing with is constant
12584 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12585 CC, SDLoc(N), false)) {
12586 AddToWorklist(SCC.getNode());
12587
12588 // cond always true -> true val
12589 // cond always false -> false val
12590 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12591 return SCCC->isZero() ? N3 : N2;
12592
12593 // When the condition is UNDEF, just return the first operand. This is
12594 // coherent the DAG creation, no setcc node is created in this case
12595 if (SCC->isUndef())
12596 return N2;
12597
12598 // Fold to a simpler select_cc
12599 if (SCC.getOpcode() == ISD::SETCC) {
12600 SDValue SelectOp = DAG.getNode(
12601 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12602 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12603 SelectOp->setFlags(SCC->getFlags());
12604 return SelectOp;
12605 }
12606 }
12607
12608 // If we can fold this based on the true/false value, do so.
12609 if (SimplifySelectOps(N, N2, N3))
12610 return SDValue(N, 0); // Don't revisit N.
12611
12612 // fold select_cc into other things, such as min/max/abs
12613 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12614}
12615
12616SDValue DAGCombiner::visitSETCC(SDNode *N) {
12617 // setcc is very commonly used as an argument to brcond. This pattern
12618 // also lend itself to numerous combines and, as a result, it is desired
12619 // we keep the argument to a brcond as a setcc as much as possible.
12620 bool PreferSetCC =
12621 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12622
12623 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12624 EVT VT = N->getValueType(0);
12625 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12626
12627 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12628
12629 if (Combined) {
12630 // If we prefer to have a setcc, and we don't, we'll try our best to
12631 // recreate one using rebuildSetCC.
12632 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12633 SDValue NewSetCC = rebuildSetCC(Combined);
12634
12635 // We don't have anything interesting to combine to.
12636 if (NewSetCC.getNode() == N)
12637 return SDValue();
12638
12639 if (NewSetCC)
12640 return NewSetCC;
12641 }
12642 return Combined;
12643 }
12644
12645 // Optimize
12646 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12647 // or
12648 // 2) (icmp eq/ne X, (rotate X, C1))
12649 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12650 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12651 // Then:
12652 // If C1 is a power of 2, then the rotate and shift+and versions are
12653 // equivilent, so we can interchange them depending on target preference.
12654 // Otherwise, if we have the shift+and version we can interchange srl/shl
12655 // which inturn affects the constant C0. We can use this to get better
12656 // constants again determined by target preference.
12657 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12658 auto IsAndWithShift = [](SDValue A, SDValue B) {
12659 return A.getOpcode() == ISD::AND &&
12660 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12661 A.getOperand(0) == B.getOperand(0);
12662 };
12663 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12664 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12665 B.getOperand(0) == A;
12666 };
12667 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12668 bool IsRotate = false;
12669
12670 // Find either shift+and or rotate pattern.
12671 if (IsAndWithShift(N0, N1)) {
12672 AndOrOp = N0;
12673 ShiftOrRotate = N1;
12674 } else if (IsAndWithShift(N1, N0)) {
12675 AndOrOp = N1;
12676 ShiftOrRotate = N0;
12677 } else if (IsRotateWithOp(N0, N1)) {
12678 IsRotate = true;
12679 AndOrOp = N0;
12680 ShiftOrRotate = N1;
12681 } else if (IsRotateWithOp(N1, N0)) {
12682 IsRotate = true;
12683 AndOrOp = N1;
12684 ShiftOrRotate = N0;
12685 }
12686
12687 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12688 (IsRotate || AndOrOp.hasOneUse())) {
12689 EVT OpVT = N0.getValueType();
12690 // Get constant shift/rotate amount and possibly mask (if its shift+and
12691 // variant).
12692 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12693 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12694 /*AllowTrunc*/ false);
12695 if (CNode == nullptr)
12696 return std::nullopt;
12697 return CNode->getAPIntValue();
12698 };
12699 std::optional<APInt> AndCMask =
12700 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12701 std::optional<APInt> ShiftCAmt =
12702 GetAPIntValue(ShiftOrRotate.getOperand(1));
12703 unsigned NumBits = OpVT.getScalarSizeInBits();
12704
12705 // We found constants.
12706 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12707 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12708 // Check that the constants meet the constraints.
12709 bool CanTransform = IsRotate;
12710 if (!CanTransform) {
12711 // Check that mask and shift compliment eachother
12712 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12713 // Check that we are comparing all bits
12714 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12715 // Check that the and mask is correct for the shift
12716 CanTransform &=
12717 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12718 }
12719
12720 // See if target prefers another shift/rotate opcode.
12721 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12722 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12723 // Transform is valid and we have a new preference.
12724 if (CanTransform && NewShiftOpc != ShiftOpc) {
12725 SDLoc DL(N);
12726 SDValue NewShiftOrRotate =
12727 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12728 ShiftOrRotate.getOperand(1));
12729 SDValue NewAndOrOp = SDValue();
12730
12731 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12732 APInt NewMask =
12733 NewShiftOpc == ISD::SHL
12734 ? APInt::getHighBitsSet(NumBits,
12735 NumBits - ShiftCAmt->getZExtValue())
12736 : APInt::getLowBitsSet(NumBits,
12737 NumBits - ShiftCAmt->getZExtValue());
12738 NewAndOrOp =
12739 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12740 DAG.getConstant(NewMask, DL, OpVT));
12741 } else {
12742 NewAndOrOp = ShiftOrRotate.getOperand(0);
12743 }
12744
12745 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12746 }
12747 }
12748 }
12749 }
12750 return SDValue();
12751}
12752
12753SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12754 SDValue LHS = N->getOperand(0);
12755 SDValue RHS = N->getOperand(1);
12756 SDValue Carry = N->getOperand(2);
12757 SDValue Cond = N->getOperand(3);
12758
12759 // If Carry is false, fold to a regular SETCC.
12760 if (isNullConstant(Carry))
12761 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12762
12763 return SDValue();
12764}
12765
12766/// Check if N satisfies:
12767/// N is used once.
12768/// N is a Load.
12769/// The load is compatible with ExtOpcode. It means
12770/// If load has explicit zero/sign extension, ExpOpcode must have the same
12771/// extension.
12772/// Otherwise returns true.
12773static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12774 if (!N.hasOneUse())
12775 return false;
12776
12777 if (!isa<LoadSDNode>(N))
12778 return false;
12779
12780 LoadSDNode *Load = cast<LoadSDNode>(N);
12781 ISD::LoadExtType LoadExt = Load->getExtensionType();
12782 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12783 return true;
12784
12785 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12786 // extension.
12787 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12788 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12789 return false;
12790
12791 return true;
12792}
12793
12794/// Fold
12795/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12796/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12797/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12798/// This function is called by the DAGCombiner when visiting sext/zext/aext
12799/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12801 SelectionDAG &DAG,
12802 CombineLevel Level) {
12803 unsigned Opcode = N->getOpcode();
12804 SDValue N0 = N->getOperand(0);
12805 EVT VT = N->getValueType(0);
12806 SDLoc DL(N);
12807
12808 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12809 Opcode == ISD::ANY_EXTEND) &&
12810 "Expected EXTEND dag node in input!");
12811
12812 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12813 !N0.hasOneUse())
12814 return SDValue();
12815
12816 SDValue Op1 = N0->getOperand(1);
12817 SDValue Op2 = N0->getOperand(2);
12818 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12819 return SDValue();
12820
12821 auto ExtLoadOpcode = ISD::EXTLOAD;
12822 if (Opcode == ISD::SIGN_EXTEND)
12823 ExtLoadOpcode = ISD::SEXTLOAD;
12824 else if (Opcode == ISD::ZERO_EXTEND)
12825 ExtLoadOpcode = ISD::ZEXTLOAD;
12826
12827 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12828 // Combine2), so we should conservatively check the OperationAction.
12829 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12830 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12831 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12832 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12833 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12835 return SDValue();
12836
12837 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12838 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12839 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12840}
12841
12842/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12843/// a build_vector of constants.
12844/// This function is called by the DAGCombiner when visiting sext/zext/aext
12845/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12846/// Vector extends are not folded if operations are legal; this is to
12847/// avoid introducing illegal build_vector dag nodes.
12849 const TargetLowering &TLI,
12850 SelectionDAG &DAG, bool LegalTypes) {
12851 unsigned Opcode = N->getOpcode();
12852 SDValue N0 = N->getOperand(0);
12853 EVT VT = N->getValueType(0);
12854
12855 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12856 "Expected EXTEND dag node in input!");
12857
12858 // fold (sext c1) -> c1
12859 // fold (zext c1) -> c1
12860 // fold (aext c1) -> c1
12861 if (isa<ConstantSDNode>(N0))
12862 return DAG.getNode(Opcode, DL, VT, N0);
12863
12864 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12865 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12866 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12867 if (N0->getOpcode() == ISD::SELECT) {
12868 SDValue Op1 = N0->getOperand(1);
12869 SDValue Op2 = N0->getOperand(2);
12870 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12871 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12872 // For any_extend, choose sign extension of the constants to allow a
12873 // possible further transform to sign_extend_inreg.i.e.
12874 //
12875 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12876 // t2: i64 = any_extend t1
12877 // -->
12878 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12879 // -->
12880 // t4: i64 = sign_extend_inreg t3
12881 unsigned FoldOpc = Opcode;
12882 if (FoldOpc == ISD::ANY_EXTEND)
12883 FoldOpc = ISD::SIGN_EXTEND;
12884 return DAG.getSelect(DL, VT, N0->getOperand(0),
12885 DAG.getNode(FoldOpc, DL, VT, Op1),
12886 DAG.getNode(FoldOpc, DL, VT, Op2));
12887 }
12888 }
12889
12890 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12891 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12892 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12893 EVT SVT = VT.getScalarType();
12894 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12896 return SDValue();
12897
12898 // We can fold this node into a build_vector.
12899 unsigned VTBits = SVT.getSizeInBits();
12900 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12902 unsigned NumElts = VT.getVectorNumElements();
12903
12904 for (unsigned i = 0; i != NumElts; ++i) {
12905 SDValue Op = N0.getOperand(i);
12906 if (Op.isUndef()) {
12907 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12908 Elts.push_back(DAG.getUNDEF(SVT));
12909 else
12910 Elts.push_back(DAG.getConstant(0, DL, SVT));
12911 continue;
12912 }
12913
12914 SDLoc DL(Op);
12915 // Get the constant value and if needed trunc it to the size of the type.
12916 // Nodes like build_vector might have constants wider than the scalar type.
12917 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12918 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12919 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12920 else
12921 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12922 }
12923
12924 return DAG.getBuildVector(VT, DL, Elts);
12925}
12926
12927// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12928// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12929// transformation. Returns true if extension are possible and the above
12930// mentioned transformation is profitable.
12932 unsigned ExtOpc,
12933 SmallVectorImpl<SDNode *> &ExtendNodes,
12934 const TargetLowering &TLI) {
12935 bool HasCopyToRegUses = false;
12936 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12937 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12938 ++UI) {
12939 SDNode *User = *UI;
12940 if (User == N)
12941 continue;
12942 if (UI.getUse().getResNo() != N0.getResNo())
12943 continue;
12944 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12945 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12946 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12947 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12948 // Sign bits will be lost after a zext.
12949 return false;
12950 bool Add = false;
12951 for (unsigned i = 0; i != 2; ++i) {
12952 SDValue UseOp = User->getOperand(i);
12953 if (UseOp == N0)
12954 continue;
12955 if (!isa<ConstantSDNode>(UseOp))
12956 return false;
12957 Add = true;
12958 }
12959 if (Add)
12960 ExtendNodes.push_back(User);
12961 continue;
12962 }
12963 // If truncates aren't free and there are users we can't
12964 // extend, it isn't worthwhile.
12965 if (!isTruncFree)
12966 return false;
12967 // Remember if this value is live-out.
12968 if (User->getOpcode() == ISD::CopyToReg)
12969 HasCopyToRegUses = true;
12970 }
12971
12972 if (HasCopyToRegUses) {
12973 bool BothLiveOut = false;
12974 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12975 UI != UE; ++UI) {
12976 SDUse &Use = UI.getUse();
12977 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12978 BothLiveOut = true;
12979 break;
12980 }
12981 }
12982 if (BothLiveOut)
12983 // Both unextended and extended values are live out. There had better be
12984 // a good reason for the transformation.
12985 return !ExtendNodes.empty();
12986 }
12987 return true;
12988}
12989
12990void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12991 SDValue OrigLoad, SDValue ExtLoad,
12992 ISD::NodeType ExtType) {
12993 // Extend SetCC uses if necessary.
12994 SDLoc DL(ExtLoad);
12995 for (SDNode *SetCC : SetCCs) {
12997
12998 for (unsigned j = 0; j != 2; ++j) {
12999 SDValue SOp = SetCC->getOperand(j);
13000 if (SOp == OrigLoad)
13001 Ops.push_back(ExtLoad);
13002 else
13003 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13004 }
13005
13006 Ops.push_back(SetCC->getOperand(2));
13007 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13008 }
13009}
13010
13011// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13012SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13013 SDValue N0 = N->getOperand(0);
13014 EVT DstVT = N->getValueType(0);
13015 EVT SrcVT = N0.getValueType();
13016
13017 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13018 N->getOpcode() == ISD::ZERO_EXTEND) &&
13019 "Unexpected node type (not an extend)!");
13020
13021 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13022 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13023 // (v8i32 (sext (v8i16 (load x))))
13024 // into:
13025 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13026 // (v4i32 (sextload (x + 16)))))
13027 // Where uses of the original load, i.e.:
13028 // (v8i16 (load x))
13029 // are replaced with:
13030 // (v8i16 (truncate
13031 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13032 // (v4i32 (sextload (x + 16)))))))
13033 //
13034 // This combine is only applicable to illegal, but splittable, vectors.
13035 // All legal types, and illegal non-vector types, are handled elsewhere.
13036 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13037 //
13038 if (N0->getOpcode() != ISD::LOAD)
13039 return SDValue();
13040
13041 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13042
13043 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13044 !N0.hasOneUse() || !LN0->isSimple() ||
13045 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13047 return SDValue();
13048
13050 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13051 return SDValue();
13052
13053 ISD::LoadExtType ExtType =
13054 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13055
13056 // Try to split the vector types to get down to legal types.
13057 EVT SplitSrcVT = SrcVT;
13058 EVT SplitDstVT = DstVT;
13059 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13060 SplitSrcVT.getVectorNumElements() > 1) {
13061 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13062 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13063 }
13064
13065 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13066 return SDValue();
13067
13068 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13069
13070 SDLoc DL(N);
13071 const unsigned NumSplits =
13072 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13073 const unsigned Stride = SplitSrcVT.getStoreSize();
13076
13077 SDValue BasePtr = LN0->getBasePtr();
13078 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13079 const unsigned Offset = Idx * Stride;
13080
13081 SDValue SplitLoad =
13082 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13083 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13084 SplitSrcVT, LN0->getOriginalAlign(),
13085 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13086
13087 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13088
13089 Loads.push_back(SplitLoad.getValue(0));
13090 Chains.push_back(SplitLoad.getValue(1));
13091 }
13092
13093 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13094 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13095
13096 // Simplify TF.
13097 AddToWorklist(NewChain.getNode());
13098
13099 CombineTo(N, NewValue);
13100
13101 // Replace uses of the original load (before extension)
13102 // with a truncate of the concatenated sextloaded vectors.
13103 SDValue Trunc =
13104 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13105 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13106 CombineTo(N0.getNode(), Trunc, NewChain);
13107 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13108}
13109
13110// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13111// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13112SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13113 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13114 EVT VT = N->getValueType(0);
13115 EVT OrigVT = N->getOperand(0).getValueType();
13116 if (TLI.isZExtFree(OrigVT, VT))
13117 return SDValue();
13118
13119 // and/or/xor
13120 SDValue N0 = N->getOperand(0);
13121 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13122 N0.getOperand(1).getOpcode() != ISD::Constant ||
13123 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13124 return SDValue();
13125
13126 // shl/shr
13127 SDValue N1 = N0->getOperand(0);
13128 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13129 N1.getOperand(1).getOpcode() != ISD::Constant ||
13130 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13131 return SDValue();
13132
13133 // load
13134 if (!isa<LoadSDNode>(N1.getOperand(0)))
13135 return SDValue();
13136 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13137 EVT MemVT = Load->getMemoryVT();
13138 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13139 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13140 return SDValue();
13141
13142
13143 // If the shift op is SHL, the logic op must be AND, otherwise the result
13144 // will be wrong.
13145 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13146 return SDValue();
13147
13148 if (!N0.hasOneUse() || !N1.hasOneUse())
13149 return SDValue();
13150
13152 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13153 ISD::ZERO_EXTEND, SetCCs, TLI))
13154 return SDValue();
13155
13156 // Actually do the transformation.
13157 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13158 Load->getChain(), Load->getBasePtr(),
13159 Load->getMemoryVT(), Load->getMemOperand());
13160
13161 SDLoc DL1(N1);
13162 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13163 N1.getOperand(1));
13164
13166 SDLoc DL0(N0);
13167 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13168 DAG.getConstant(Mask, DL0, VT));
13169
13170 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13171 CombineTo(N, And);
13172 if (SDValue(Load, 0).hasOneUse()) {
13173 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13174 } else {
13175 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13176 Load->getValueType(0), ExtLoad);
13177 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13178 }
13179
13180 // N0 is dead at this point.
13181 recursivelyDeleteUnusedNodes(N0.getNode());
13182
13183 return SDValue(N,0); // Return N so it doesn't get rechecked!
13184}
13185
13186/// If we're narrowing or widening the result of a vector select and the final
13187/// size is the same size as a setcc (compare) feeding the select, then try to
13188/// apply the cast operation to the select's operands because matching vector
13189/// sizes for a select condition and other operands should be more efficient.
13190SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13191 unsigned CastOpcode = Cast->getOpcode();
13192 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13193 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13194 CastOpcode == ISD::FP_ROUND) &&
13195 "Unexpected opcode for vector select narrowing/widening");
13196
13197 // We only do this transform before legal ops because the pattern may be
13198 // obfuscated by target-specific operations after legalization. Do not create
13199 // an illegal select op, however, because that may be difficult to lower.
13200 EVT VT = Cast->getValueType(0);
13201 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13202 return SDValue();
13203
13204 SDValue VSel = Cast->getOperand(0);
13205 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13206 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13207 return SDValue();
13208
13209 // Does the setcc have the same vector size as the casted select?
13210 SDValue SetCC = VSel.getOperand(0);
13211 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13212 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13213 return SDValue();
13214
13215 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13216 SDValue A = VSel.getOperand(1);
13217 SDValue B = VSel.getOperand(2);
13218 SDValue CastA, CastB;
13219 SDLoc DL(Cast);
13220 if (CastOpcode == ISD::FP_ROUND) {
13221 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13222 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13223 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13224 } else {
13225 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13226 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13227 }
13228 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13229}
13230
13231// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13232// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13234 const TargetLowering &TLI, EVT VT,
13235 bool LegalOperations, SDNode *N,
13236 SDValue N0, ISD::LoadExtType ExtLoadType) {
13237 SDNode *N0Node = N0.getNode();
13238 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13239 : ISD::isZEXTLoad(N0Node);
13240 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13241 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13242 return SDValue();
13243
13244 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13245 EVT MemVT = LN0->getMemoryVT();
13246 if ((LegalOperations || !LN0->isSimple() ||
13247 VT.isVector()) &&
13248 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13249 return SDValue();
13250
13251 SDValue ExtLoad =
13252 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13253 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13254 Combiner.CombineTo(N, ExtLoad);
13255 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13256 if (LN0->use_empty())
13257 Combiner.recursivelyDeleteUnusedNodes(LN0);
13258 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13259}
13260
13261// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13262// Only generate vector extloads when 1) they're legal, and 2) they are
13263// deemed desirable by the target. NonNegZExt can be set to true if a zero
13264// extend has the nonneg flag to allow use of sextload if profitable.
13266 const TargetLowering &TLI, EVT VT,
13267 bool LegalOperations, SDNode *N, SDValue N0,
13268 ISD::LoadExtType ExtLoadType,
13269 ISD::NodeType ExtOpc,
13270 bool NonNegZExt = false) {
13272 return {};
13273
13274 // If this is zext nneg, see if it would make sense to treat it as a sext.
13275 if (NonNegZExt) {
13276 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13277 "Unexpected load type or opcode");
13278 for (SDNode *User : N0->uses()) {
13279 if (User->getOpcode() == ISD::SETCC) {
13280 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13282 ExtLoadType = ISD::SEXTLOAD;
13283 ExtOpc = ISD::SIGN_EXTEND;
13284 break;
13285 }
13286 }
13287 }
13288 }
13289
13290 // TODO: isFixedLengthVector() should be removed and any negative effects on
13291 // code generation being the result of that target's implementation of
13292 // isVectorLoadExtDesirable().
13293 if ((LegalOperations || VT.isFixedLengthVector() ||
13294 !cast<LoadSDNode>(N0)->isSimple()) &&
13295 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13296 return {};
13297
13298 bool DoXform = true;
13300 if (!N0.hasOneUse())
13301 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13302 if (VT.isVector())
13303 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13304 if (!DoXform)
13305 return {};
13306
13307 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13308 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13309 LN0->getBasePtr(), N0.getValueType(),
13310 LN0->getMemOperand());
13311 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13312 // If the load value is used only by N, replace it via CombineTo N.
13313 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13314 Combiner.CombineTo(N, ExtLoad);
13315 if (NoReplaceTrunc) {
13316 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13317 Combiner.recursivelyDeleteUnusedNodes(LN0);
13318 } else {
13319 SDValue Trunc =
13320 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13321 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13322 }
13323 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13324}
13325
13326static SDValue
13328 bool LegalOperations, SDNode *N, SDValue N0,
13329 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13330 if (!N0.hasOneUse())
13331 return SDValue();
13332
13333 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13334 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13335 return SDValue();
13336
13337 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13338 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13339 return SDValue();
13340
13341 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13342 return SDValue();
13343
13344 SDLoc dl(Ld);
13345 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13346 SDValue NewLoad = DAG.getMaskedLoad(
13347 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13348 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13349 ExtLoadType, Ld->isExpandingLoad());
13350 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13351 return NewLoad;
13352}
13353
13354// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13356 const TargetLowering &TLI, EVT VT,
13357 SDValue N0,
13358 ISD::LoadExtType ExtLoadType) {
13359 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13360 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13361 return {};
13362 EVT MemoryVT = ALoad->getMemoryVT();
13363 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13364 return {};
13365 // Can't fold into ALoad if it is already extending differently.
13366 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13367 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13368 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13369 return {};
13370
13371 EVT OrigVT = ALoad->getValueType(0);
13372 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13373 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13374 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13375 ALoad->getBasePtr(), ALoad->getMemOperand()));
13376 NewALoad->setExtensionType(ExtLoadType);
13378 SDValue(ALoad, 0),
13379 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13380 // Update the chain uses.
13381 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13382 return SDValue(NewALoad, 0);
13383}
13384
13386 bool LegalOperations) {
13387 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13388 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13389
13390 SDValue SetCC = N->getOperand(0);
13391 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13392 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13393 return SDValue();
13394
13395 SDValue X = SetCC.getOperand(0);
13396 SDValue Ones = SetCC.getOperand(1);
13397 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13398 EVT VT = N->getValueType(0);
13399 EVT XVT = X.getValueType();
13400 // setge X, C is canonicalized to setgt, so we do not need to match that
13401 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13402 // not require the 'not' op.
13403 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13404 // Invert and smear/shift the sign bit:
13405 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13406 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13407 SDLoc DL(N);
13408 unsigned ShCt = VT.getSizeInBits() - 1;
13409 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13410 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13411 SDValue NotX = DAG.getNOT(DL, X, VT);
13412 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13413 auto ShiftOpcode =
13414 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13415 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13416 }
13417 }
13418 return SDValue();
13419}
13420
13421SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13422 SDValue N0 = N->getOperand(0);
13423 if (N0.getOpcode() != ISD::SETCC)
13424 return SDValue();
13425
13426 SDValue N00 = N0.getOperand(0);
13427 SDValue N01 = N0.getOperand(1);
13428 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13429 EVT VT = N->getValueType(0);
13430 EVT N00VT = N00.getValueType();
13431 SDLoc DL(N);
13432
13433 // Propagate fast-math-flags.
13434 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13435
13436 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13437 // the same size as the compared operands. Try to optimize sext(setcc())
13438 // if this is the case.
13439 if (VT.isVector() && !LegalOperations &&
13440 TLI.getBooleanContents(N00VT) ==
13442 EVT SVT = getSetCCResultType(N00VT);
13443
13444 // If we already have the desired type, don't change it.
13445 if (SVT != N0.getValueType()) {
13446 // We know that the # elements of the results is the same as the
13447 // # elements of the compare (and the # elements of the compare result
13448 // for that matter). Check to see that they are the same size. If so,
13449 // we know that the element size of the sext'd result matches the
13450 // element size of the compare operands.
13451 if (VT.getSizeInBits() == SVT.getSizeInBits())
13452 return DAG.getSetCC(DL, VT, N00, N01, CC);
13453
13454 // If the desired elements are smaller or larger than the source
13455 // elements, we can use a matching integer vector type and then
13456 // truncate/sign extend.
13457 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13458 if (SVT == MatchingVecType) {
13459 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13460 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13461 }
13462 }
13463
13464 // Try to eliminate the sext of a setcc by zexting the compare operands.
13465 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13467 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13468 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13469 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13470
13471 // We have an unsupported narrow vector compare op that would be legal
13472 // if extended to the destination type. See if the compare operands
13473 // can be freely extended to the destination type.
13474 auto IsFreeToExtend = [&](SDValue V) {
13475 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13476 return true;
13477 // Match a simple, non-extended load that can be converted to a
13478 // legal {z/s}ext-load.
13479 // TODO: Allow widening of an existing {z/s}ext-load?
13480 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13481 ISD::isUNINDEXEDLoad(V.getNode()) &&
13482 cast<LoadSDNode>(V)->isSimple() &&
13483 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13484 return false;
13485
13486 // Non-chain users of this value must either be the setcc in this
13487 // sequence or extends that can be folded into the new {z/s}ext-load.
13488 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13489 UI != UE; ++UI) {
13490 // Skip uses of the chain and the setcc.
13491 SDNode *User = *UI;
13492 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13493 continue;
13494 // Extra users must have exactly the same cast we are about to create.
13495 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13496 // is enhanced similarly.
13497 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13498 return false;
13499 }
13500 return true;
13501 };
13502
13503 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13504 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13505 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13506 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13507 }
13508 }
13509 }
13510
13511 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13512 // Here, T can be 1 or -1, depending on the type of the setcc and
13513 // getBooleanContents().
13514 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13515
13516 // To determine the "true" side of the select, we need to know the high bit
13517 // of the value returned by the setcc if it evaluates to true.
13518 // If the type of the setcc is i1, then the true case of the select is just
13519 // sext(i1 1), that is, -1.
13520 // If the type of the setcc is larger (say, i8) then the value of the high
13521 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13522 // of the appropriate width.
13523 SDValue ExtTrueVal = (SetCCWidth == 1)
13524 ? DAG.getAllOnesConstant(DL, VT)
13525 : DAG.getBoolConstant(true, DL, VT, N00VT);
13526 SDValue Zero = DAG.getConstant(0, DL, VT);
13527 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13528 return SCC;
13529
13530 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13531 EVT SetCCVT = getSetCCResultType(N00VT);
13532 // Don't do this transform for i1 because there's a select transform
13533 // that would reverse it.
13534 // TODO: We should not do this transform at all without a target hook
13535 // because a sext is likely cheaper than a select?
13536 if (SetCCVT.getScalarSizeInBits() != 1 &&
13537 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13538 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13539 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13540 }
13541 }
13542
13543 return SDValue();
13544}
13545
13546SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13547 SDValue N0 = N->getOperand(0);
13548 EVT VT = N->getValueType(0);
13549 SDLoc DL(N);
13550
13551 if (VT.isVector())
13552 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13553 return FoldedVOp;
13554
13555 // sext(undef) = 0 because the top bit will all be the same.
13556 if (N0.isUndef())
13557 return DAG.getConstant(0, DL, VT);
13558
13559 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13560 return Res;
13561
13562 // fold (sext (sext x)) -> (sext x)
13563 // fold (sext (aext x)) -> (sext x)
13564 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13565 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13566
13567 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13568 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13572 N0.getOperand(0));
13573
13574 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13575 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13576 SDValue N00 = N0.getOperand(0);
13577 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13578 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13579 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13580 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13581 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13582 }
13583 }
13584
13585 if (N0.getOpcode() == ISD::TRUNCATE) {
13586 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13587 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13588 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13589 SDNode *oye = N0.getOperand(0).getNode();
13590 if (NarrowLoad.getNode() != N0.getNode()) {
13591 CombineTo(N0.getNode(), NarrowLoad);
13592 // CombineTo deleted the truncate, if needed, but not what's under it.
13593 AddToWorklist(oye);
13594 }
13595 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13596 }
13597
13598 // See if the value being truncated is already sign extended. If so, just
13599 // eliminate the trunc/sext pair.
13600 SDValue Op = N0.getOperand(0);
13601 unsigned OpBits = Op.getScalarValueSizeInBits();
13602 unsigned MidBits = N0.getScalarValueSizeInBits();
13603 unsigned DestBits = VT.getScalarSizeInBits();
13604 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13605
13606 if (OpBits == DestBits) {
13607 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13608 // bits, it is already ready.
13609 if (NumSignBits > DestBits-MidBits)
13610 return Op;
13611 } else if (OpBits < DestBits) {
13612 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13613 // bits, just sext from i32.
13614 if (NumSignBits > OpBits-MidBits)
13615 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13616 } else {
13617 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13618 // bits, just truncate to i32.
13619 if (NumSignBits > OpBits-MidBits)
13620 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13621 }
13622
13623 // fold (sext (truncate x)) -> (sextinreg x).
13624 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13625 N0.getValueType())) {
13626 if (OpBits < DestBits)
13627 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13628 else if (OpBits > DestBits)
13629 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13630 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13631 DAG.getValueType(N0.getValueType()));
13632 }
13633 }
13634
13635 // Try to simplify (sext (load x)).
13636 if (SDValue foldedExt =
13637 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13639 return foldedExt;
13640
13641 if (SDValue foldedExt =
13642 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13644 return foldedExt;
13645
13646 // fold (sext (load x)) to multiple smaller sextloads.
13647 // Only on illegal but splittable vectors.
13648 if (SDValue ExtLoad = CombineExtLoad(N))
13649 return ExtLoad;
13650
13651 // Try to simplify (sext (sextload x)).
13652 if (SDValue foldedExt = tryToFoldExtOfExtload(
13653 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13654 return foldedExt;
13655
13656 // Try to simplify (sext (atomic_load x)).
13657 if (SDValue foldedExt =
13658 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13659 return foldedExt;
13660
13661 // fold (sext (and/or/xor (load x), cst)) ->
13662 // (and/or/xor (sextload x), (sext cst))
13663 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13664 isa<LoadSDNode>(N0.getOperand(0)) &&
13665 N0.getOperand(1).getOpcode() == ISD::Constant &&
13666 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13667 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13668 EVT MemVT = LN00->getMemoryVT();
13669 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13670 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13672 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13673 ISD::SIGN_EXTEND, SetCCs, TLI);
13674 if (DoXform) {
13675 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13676 LN00->getChain(), LN00->getBasePtr(),
13677 LN00->getMemoryVT(),
13678 LN00->getMemOperand());
13680 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13681 ExtLoad, DAG.getConstant(Mask, DL, VT));
13682 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13683 bool NoReplaceTruncAnd = !N0.hasOneUse();
13684 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13685 CombineTo(N, And);
13686 // If N0 has multiple uses, change other uses as well.
13687 if (NoReplaceTruncAnd) {
13688 SDValue TruncAnd =
13690 CombineTo(N0.getNode(), TruncAnd);
13691 }
13692 if (NoReplaceTrunc) {
13693 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13694 } else {
13695 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13696 LN00->getValueType(0), ExtLoad);
13697 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13698 }
13699 return SDValue(N,0); // Return N so it doesn't get rechecked!
13700 }
13701 }
13702 }
13703
13704 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13705 return V;
13706
13707 if (SDValue V = foldSextSetcc(N))
13708 return V;
13709
13710 // fold (sext x) -> (zext x) if the sign bit is known zero.
13711 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13712 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13713 DAG.SignBitIsZero(N0)) {
13715 Flags.setNonNeg(true);
13716 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13717 }
13718
13719 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13720 return NewVSel;
13721
13722 // Eliminate this sign extend by doing a negation in the destination type:
13723 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13724 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13728 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13729 return DAG.getNegative(Zext, DL, VT);
13730 }
13731 // Eliminate this sign extend by doing a decrement in the destination type:
13732 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13733 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13737 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13738 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13739 }
13740
13741 // fold sext (not i1 X) -> add (zext i1 X), -1
13742 // TODO: This could be extended to handle bool vectors.
13743 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13744 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13745 TLI.isOperationLegal(ISD::ADD, VT)))) {
13746 // If we can eliminate the 'not', the sext form should be better
13747 if (SDValue NewXor = visitXOR(N0.getNode())) {
13748 // Returning N0 is a form of in-visit replacement that may have
13749 // invalidated N0.
13750 if (NewXor.getNode() == N0.getNode()) {
13751 // Return SDValue here as the xor should have already been replaced in
13752 // this sext.
13753 return SDValue();
13754 }
13755
13756 // Return a new sext with the new xor.
13757 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13758 }
13759
13760 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13761 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13762 }
13763
13764 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13765 return Res;
13766
13767 return SDValue();
13768}
13769
13770/// Given an extending node with a pop-count operand, if the target does not
13771/// support a pop-count in the narrow source type but does support it in the
13772/// destination type, widen the pop-count to the destination type.
13773static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13774 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13775 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13776
13777 SDValue CtPop = Extend->getOperand(0);
13778 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13779 return SDValue();
13780
13781 EVT VT = Extend->getValueType(0);
13782 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13785 return SDValue();
13786
13787 // zext (ctpop X) --> ctpop (zext X)
13788 SDLoc DL(Extend);
13789 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13790 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13791}
13792
13793// If we have (zext (abs X)) where X is a type that will be promoted by type
13794// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13795static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13796 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13797
13798 EVT VT = Extend->getValueType(0);
13799 if (VT.isVector())
13800 return SDValue();
13801
13802 SDValue Abs = Extend->getOperand(0);
13803 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13804 return SDValue();
13805
13806 EVT AbsVT = Abs.getValueType();
13807 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13808 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13810 return SDValue();
13811
13812 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13813
13814 SDValue SExt =
13815 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13816 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13817 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13818}
13819
13820SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13821 SDValue N0 = N->getOperand(0);
13822 EVT VT = N->getValueType(0);
13823 SDLoc DL(N);
13824
13825 if (VT.isVector())
13826 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13827 return FoldedVOp;
13828
13829 // zext(undef) = 0
13830 if (N0.isUndef())
13831 return DAG.getConstant(0, DL, VT);
13832
13833 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13834 return Res;
13835
13836 // fold (zext (zext x)) -> (zext x)
13837 // fold (zext (aext x)) -> (zext x)
13838 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13840 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13841 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13842 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13843 }
13844
13845 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13846 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13850 N0.getOperand(0));
13851
13852 // fold (zext (truncate x)) -> (zext x) or
13853 // (zext (truncate x)) -> (truncate x)
13854 // This is valid when the truncated bits of x are already zero.
13855 SDValue Op;
13856 KnownBits Known;
13857 if (isTruncateOf(DAG, N0, Op, Known)) {
13858 APInt TruncatedBits =
13859 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13860 APInt(Op.getScalarValueSizeInBits(), 0) :
13861 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13863 std::min(Op.getScalarValueSizeInBits(),
13864 VT.getScalarSizeInBits()));
13865 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13866 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13867 DAG.salvageDebugInfo(*N0.getNode());
13868
13869 return ZExtOrTrunc;
13870 }
13871 }
13872
13873 // fold (zext (truncate x)) -> (and x, mask)
13874 if (N0.getOpcode() == ISD::TRUNCATE) {
13875 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13876 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13877 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13878 SDNode *oye = N0.getOperand(0).getNode();
13879 if (NarrowLoad.getNode() != N0.getNode()) {
13880 CombineTo(N0.getNode(), NarrowLoad);
13881 // CombineTo deleted the truncate, if needed, but not what's under it.
13882 AddToWorklist(oye);
13883 }
13884 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13885 }
13886
13887 EVT SrcVT = N0.getOperand(0).getValueType();
13888 EVT MinVT = N0.getValueType();
13889
13890 if (N->getFlags().hasNonNeg()) {
13891 SDValue Op = N0.getOperand(0);
13892 unsigned OpBits = SrcVT.getScalarSizeInBits();
13893 unsigned MidBits = MinVT.getScalarSizeInBits();
13894 unsigned DestBits = VT.getScalarSizeInBits();
13895 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13896
13897 if (OpBits == DestBits) {
13898 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13899 // bits, it is already ready.
13900 if (NumSignBits > DestBits - MidBits)
13901 return Op;
13902 } else if (OpBits < DestBits) {
13903 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13904 // bits, just sext from i32.
13905 // FIXME: This can probably be ZERO_EXTEND nneg?
13906 if (NumSignBits > OpBits - MidBits)
13907 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13908 } else {
13909 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13910 // bits, just truncate to i32.
13911 if (NumSignBits > OpBits - MidBits)
13912 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13913 }
13914 }
13915
13916 // Try to mask before the extension to avoid having to generate a larger mask,
13917 // possibly over several sub-vectors.
13918 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13919 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13921 SDValue Op = N0.getOperand(0);
13922 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13923 AddToWorklist(Op.getNode());
13924 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13925 // Transfer the debug info; the new node is equivalent to N0.
13926 DAG.transferDbgValues(N0, ZExtOrTrunc);
13927 return ZExtOrTrunc;
13928 }
13929 }
13930
13931 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13932 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13933 AddToWorklist(Op.getNode());
13934 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13935 // We may safely transfer the debug info describing the truncate node over
13936 // to the equivalent and operation.
13937 DAG.transferDbgValues(N0, And);
13938 return And;
13939 }
13940 }
13941
13942 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13943 // if either of the casts is not free.
13944 if (N0.getOpcode() == ISD::AND &&
13945 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13946 N0.getOperand(1).getOpcode() == ISD::Constant &&
13947 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13948 !TLI.isZExtFree(N0.getValueType(), VT))) {
13949 SDValue X = N0.getOperand(0).getOperand(0);
13950 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13952 return DAG.getNode(ISD::AND, DL, VT,
13953 X, DAG.getConstant(Mask, DL, VT));
13954 }
13955
13956 // Try to simplify (zext (load x)).
13957 if (SDValue foldedExt = tryToFoldExtOfLoad(
13958 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13959 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13960 return foldedExt;
13961
13962 if (SDValue foldedExt =
13963 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13965 return foldedExt;
13966
13967 // fold (zext (load x)) to multiple smaller zextloads.
13968 // Only on illegal but splittable vectors.
13969 if (SDValue ExtLoad = CombineExtLoad(N))
13970 return ExtLoad;
13971
13972 // Try to simplify (zext (atomic_load x)).
13973 if (SDValue foldedExt =
13974 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13975 return foldedExt;
13976
13977 // fold (zext (and/or/xor (load x), cst)) ->
13978 // (and/or/xor (zextload x), (zext cst))
13979 // Unless (and (load x) cst) will match as a zextload already and has
13980 // additional users, or the zext is already free.
13981 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13982 isa<LoadSDNode>(N0.getOperand(0)) &&
13983 N0.getOperand(1).getOpcode() == ISD::Constant &&
13984 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13985 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13986 EVT MemVT = LN00->getMemoryVT();
13987 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13988 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13989 bool DoXform = true;
13991 if (!N0.hasOneUse()) {
13992 if (N0.getOpcode() == ISD::AND) {
13993 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13994 EVT LoadResultTy = AndC->getValueType(0);
13995 EVT ExtVT;
13996 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13997 DoXform = false;
13998 }
13999 }
14000 if (DoXform)
14001 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14002 ISD::ZERO_EXTEND, SetCCs, TLI);
14003 if (DoXform) {
14004 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14005 LN00->getChain(), LN00->getBasePtr(),
14006 LN00->getMemoryVT(),
14007 LN00->getMemOperand());
14009 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14010 ExtLoad, DAG.getConstant(Mask, DL, VT));
14011 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14012 bool NoReplaceTruncAnd = !N0.hasOneUse();
14013 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14014 CombineTo(N, And);
14015 // If N0 has multiple uses, change other uses as well.
14016 if (NoReplaceTruncAnd) {
14017 SDValue TruncAnd =
14019 CombineTo(N0.getNode(), TruncAnd);
14020 }
14021 if (NoReplaceTrunc) {
14022 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14023 } else {
14024 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14025 LN00->getValueType(0), ExtLoad);
14026 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14027 }
14028 return SDValue(N,0); // Return N so it doesn't get rechecked!
14029 }
14030 }
14031 }
14032
14033 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14034 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14035 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14036 return ZExtLoad;
14037
14038 // Try to simplify (zext (zextload x)).
14039 if (SDValue foldedExt = tryToFoldExtOfExtload(
14040 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14041 return foldedExt;
14042
14043 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14044 return V;
14045
14046 if (N0.getOpcode() == ISD::SETCC) {
14047 // Propagate fast-math-flags.
14048 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14049
14050 // Only do this before legalize for now.
14051 if (!LegalOperations && VT.isVector() &&
14052 N0.getValueType().getVectorElementType() == MVT::i1) {
14053 EVT N00VT = N0.getOperand(0).getValueType();
14054 if (getSetCCResultType(N00VT) == N0.getValueType())
14055 return SDValue();
14056
14057 // We know that the # elements of the results is the same as the #
14058 // elements of the compare (and the # elements of the compare result for
14059 // that matter). Check to see that they are the same size. If so, we know
14060 // that the element size of the sext'd result matches the element size of
14061 // the compare operands.
14062 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14063 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14064 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14065 N0.getOperand(1), N0.getOperand(2));
14066 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14067 }
14068
14069 // If the desired elements are smaller or larger than the source
14070 // elements we can use a matching integer vector type and then
14071 // truncate/any extend followed by zext_in_reg.
14072 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14073 SDValue VsetCC =
14074 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14075 N0.getOperand(1), N0.getOperand(2));
14076 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14077 N0.getValueType());
14078 }
14079
14080 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14081 EVT N0VT = N0.getValueType();
14082 EVT N00VT = N0.getOperand(0).getValueType();
14083 if (SDValue SCC = SimplifySelectCC(
14084 DL, N0.getOperand(0), N0.getOperand(1),
14085 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14086 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14087 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14088 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14089 }
14090
14091 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14092 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14093 !TLI.isZExtFree(N0, VT)) {
14094 SDValue ShVal = N0.getOperand(0);
14095 SDValue ShAmt = N0.getOperand(1);
14096 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14097 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14098 if (N0.getOpcode() == ISD::SHL) {
14099 // If the original shl may be shifting out bits, do not perform this
14100 // transformation.
14101 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14102 ShVal.getOperand(0).getValueSizeInBits();
14103 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14104 // If the shift is too large, then see if we can deduce that the
14105 // shift is safe anyway.
14106 // Create a mask that has ones for the bits being shifted out.
14107 APInt ShiftOutMask =
14109 ShAmtC->getAPIntValue().getZExtValue());
14110
14111 // Check if the bits being shifted out are known to be zero.
14112 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
14113 return SDValue();
14114 }
14115 }
14116
14117 // Ensure that the shift amount is wide enough for the shifted value.
14118 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14119 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14120
14121 return DAG.getNode(N0.getOpcode(), DL, VT,
14122 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14123 }
14124 }
14125 }
14126
14127 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14128 return NewVSel;
14129
14130 if (SDValue NewCtPop = widenCtPop(N, DAG))
14131 return NewCtPop;
14132
14133 if (SDValue V = widenAbs(N, DAG))
14134 return V;
14135
14136 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14137 return Res;
14138
14139 // CSE zext nneg with sext if the zext is not free.
14140 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14141 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14142 if (CSENode)
14143 return SDValue(CSENode, 0);
14144 }
14145
14146 return SDValue();
14147}
14148
14149SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14150 SDValue N0 = N->getOperand(0);
14151 EVT VT = N->getValueType(0);
14152 SDLoc DL(N);
14153
14154 // aext(undef) = undef
14155 if (N0.isUndef())
14156 return DAG.getUNDEF(VT);
14157
14158 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14159 return Res;
14160
14161 // fold (aext (aext x)) -> (aext x)
14162 // fold (aext (zext x)) -> (zext x)
14163 // fold (aext (sext x)) -> (sext x)
14164 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14165 N0.getOpcode() == ISD::SIGN_EXTEND) {
14167 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14168 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14169 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14170 }
14171
14172 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14173 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14174 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14178 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14179
14180 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14181 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14182 if (N0.getOpcode() == ISD::TRUNCATE) {
14183 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14184 SDNode *oye = N0.getOperand(0).getNode();
14185 if (NarrowLoad.getNode() != N0.getNode()) {
14186 CombineTo(N0.getNode(), NarrowLoad);
14187 // CombineTo deleted the truncate, if needed, but not what's under it.
14188 AddToWorklist(oye);
14189 }
14190 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14191 }
14192 }
14193
14194 // fold (aext (truncate x))
14195 if (N0.getOpcode() == ISD::TRUNCATE)
14196 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14197
14198 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14199 // if the trunc is not free.
14200 if (N0.getOpcode() == ISD::AND &&
14201 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14202 N0.getOperand(1).getOpcode() == ISD::Constant &&
14203 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14204 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14205 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14206 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14207 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14208 }
14209
14210 // fold (aext (load x)) -> (aext (truncate (extload x)))
14211 // None of the supported targets knows how to perform load and any_ext
14212 // on vectors in one instruction, so attempt to fold to zext instead.
14213 if (VT.isVector()) {
14214 // Try to simplify (zext (load x)).
14215 if (SDValue foldedExt =
14216 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14218 return foldedExt;
14219 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14221 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14222 bool DoXform = true;
14224 if (!N0.hasOneUse())
14225 DoXform =
14226 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14227 if (DoXform) {
14228 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14229 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14230 LN0->getBasePtr(), N0.getValueType(),
14231 LN0->getMemOperand());
14232 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14233 // If the load value is used only by N, replace it via CombineTo N.
14234 bool NoReplaceTrunc = N0.hasOneUse();
14235 CombineTo(N, ExtLoad);
14236 if (NoReplaceTrunc) {
14237 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14238 recursivelyDeleteUnusedNodes(LN0);
14239 } else {
14240 SDValue Trunc =
14241 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14242 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14243 }
14244 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14245 }
14246 }
14247
14248 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14249 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14250 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14251 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14252 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14253 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14254 ISD::LoadExtType ExtType = LN0->getExtensionType();
14255 EVT MemVT = LN0->getMemoryVT();
14256 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14257 SDValue ExtLoad =
14258 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14259 MemVT, LN0->getMemOperand());
14260 CombineTo(N, ExtLoad);
14261 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14262 recursivelyDeleteUnusedNodes(LN0);
14263 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14264 }
14265 }
14266
14267 if (N0.getOpcode() == ISD::SETCC) {
14268 // Propagate fast-math-flags.
14269 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14270
14271 // For vectors:
14272 // aext(setcc) -> vsetcc
14273 // aext(setcc) -> truncate(vsetcc)
14274 // aext(setcc) -> aext(vsetcc)
14275 // Only do this before legalize for now.
14276 if (VT.isVector() && !LegalOperations) {
14277 EVT N00VT = N0.getOperand(0).getValueType();
14278 if (getSetCCResultType(N00VT) == N0.getValueType())
14279 return SDValue();
14280
14281 // We know that the # elements of the results is the same as the
14282 // # elements of the compare (and the # elements of the compare result
14283 // for that matter). Check to see that they are the same size. If so,
14284 // we know that the element size of the sext'd result matches the
14285 // element size of the compare operands.
14286 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14287 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14288 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14289
14290 // If the desired elements are smaller or larger than the source
14291 // elements we can use a matching integer vector type and then
14292 // truncate/any extend
14293 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14294 SDValue VsetCC = DAG.getSetCC(
14295 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14296 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14297 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14298 }
14299
14300 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14301 if (SDValue SCC = SimplifySelectCC(
14302 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14303 DAG.getConstant(0, DL, VT),
14304 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14305 return SCC;
14306 }
14307
14308 if (SDValue NewCtPop = widenCtPop(N, DAG))
14309 return NewCtPop;
14310
14311 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14312 return Res;
14313
14314 return SDValue();
14315}
14316
14317SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14318 unsigned Opcode = N->getOpcode();
14319 SDValue N0 = N->getOperand(0);
14320 SDValue N1 = N->getOperand(1);
14321 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14322
14323 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14324 if (N0.getOpcode() == Opcode &&
14325 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14326 return N0;
14327
14328 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14329 N0.getOperand(0).getOpcode() == Opcode) {
14330 // We have an assert, truncate, assert sandwich. Make one stronger assert
14331 // by asserting on the smallest asserted type to the larger source type.
14332 // This eliminates the later assert:
14333 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14334 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14335 SDLoc DL(N);
14336 SDValue BigA = N0.getOperand(0);
14337 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14338 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14339 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14340 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14341 BigA.getOperand(0), MinAssertVTVal);
14342 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14343 }
14344
14345 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14346 // than X. Just move the AssertZext in front of the truncate and drop the
14347 // AssertSExt.
14348 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14350 Opcode == ISD::AssertZext) {
14351 SDValue BigA = N0.getOperand(0);
14352 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14353 if (AssertVT.bitsLT(BigA_AssertVT)) {
14354 SDLoc DL(N);
14355 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14356 BigA.getOperand(0), N1);
14357 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14358 }
14359 }
14360
14361 return SDValue();
14362}
14363
14364SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14365 SDLoc DL(N);
14366
14367 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14368 SDValue N0 = N->getOperand(0);
14369
14370 // Fold (assertalign (assertalign x, AL0), AL1) ->
14371 // (assertalign x, max(AL0, AL1))
14372 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14373 return DAG.getAssertAlign(DL, N0.getOperand(0),
14374 std::max(AL, AAN->getAlign()));
14375
14376 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14377 // this assert down to source operands so that those arithmetic ops could be
14378 // exposed to the DAG combining.
14379 switch (N0.getOpcode()) {
14380 default:
14381 break;
14382 case ISD::ADD:
14383 case ISD::SUB: {
14384 unsigned AlignShift = Log2(AL);
14385 SDValue LHS = N0.getOperand(0);
14386 SDValue RHS = N0.getOperand(1);
14387 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14388 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14389 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14390 if (LHSAlignShift < AlignShift)
14391 LHS = DAG.getAssertAlign(DL, LHS, AL);
14392 if (RHSAlignShift < AlignShift)
14393 RHS = DAG.getAssertAlign(DL, RHS, AL);
14394 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14395 }
14396 break;
14397 }
14398 }
14399
14400 return SDValue();
14401}
14402
14403/// If the result of a load is shifted/masked/truncated to an effectively
14404/// narrower type, try to transform the load to a narrower type and/or
14405/// use an extending load.
14406SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14407 unsigned Opc = N->getOpcode();
14408
14410 SDValue N0 = N->getOperand(0);
14411 EVT VT = N->getValueType(0);
14412 EVT ExtVT = VT;
14413
14414 // This transformation isn't valid for vector loads.
14415 if (VT.isVector())
14416 return SDValue();
14417
14418 // The ShAmt variable is used to indicate that we've consumed a right
14419 // shift. I.e. we want to narrow the width of the load by skipping to load the
14420 // ShAmt least significant bits.
14421 unsigned ShAmt = 0;
14422 // A special case is when the least significant bits from the load are masked
14423 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14424 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14425 // the result.
14426 unsigned ShiftedOffset = 0;
14427 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14428 // extended to VT.
14429 if (Opc == ISD::SIGN_EXTEND_INREG) {
14430 ExtType = ISD::SEXTLOAD;
14431 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14432 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14433 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14434 // value, or it may be shifting a higher subword, half or byte into the
14435 // lowest bits.
14436
14437 // Only handle shift with constant shift amount, and the shiftee must be a
14438 // load.
14439 auto *LN = dyn_cast<LoadSDNode>(N0);
14440 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14441 if (!N1C || !LN)
14442 return SDValue();
14443 // If the shift amount is larger than the memory type then we're not
14444 // accessing any of the loaded bytes.
14445 ShAmt = N1C->getZExtValue();
14446 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14447 if (MemoryWidth <= ShAmt)
14448 return SDValue();
14449 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14450 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14451 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14452 // If original load is a SEXTLOAD then we can't simply replace it by a
14453 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14454 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14455 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14456 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14457 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14458 LN->getExtensionType() != ExtType)
14459 return SDValue();
14460 } else if (Opc == ISD::AND) {
14461 // An AND with a constant mask is the same as a truncate + zero-extend.
14462 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14463 if (!AndC)
14464 return SDValue();
14465
14466 const APInt &Mask = AndC->getAPIntValue();
14467 unsigned ActiveBits = 0;
14468 if (Mask.isMask()) {
14469 ActiveBits = Mask.countr_one();
14470 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14471 ShiftedOffset = ShAmt;
14472 } else {
14473 return SDValue();
14474 }
14475
14476 ExtType = ISD::ZEXTLOAD;
14477 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14478 }
14479
14480 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14481 // a right shift. Here we redo some of those checks, to possibly adjust the
14482 // ExtVT even further based on "a masking AND". We could also end up here for
14483 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14484 // need to be done here as well.
14485 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14486 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14487 // Bail out when the SRL has more than one use. This is done for historical
14488 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14489 // check below? And maybe it could be non-profitable to do the transform in
14490 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14491 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14492 if (!SRL.hasOneUse())
14493 return SDValue();
14494
14495 // Only handle shift with constant shift amount, and the shiftee must be a
14496 // load.
14497 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14498 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14499 if (!SRL1C || !LN)
14500 return SDValue();
14501
14502 // If the shift amount is larger than the input type then we're not
14503 // accessing any of the loaded bytes. If the load was a zextload/extload
14504 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14505 ShAmt = SRL1C->getZExtValue();
14506 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14507 if (ShAmt >= MemoryWidth)
14508 return SDValue();
14509
14510 // Because a SRL must be assumed to *need* to zero-extend the high bits
14511 // (as opposed to anyext the high bits), we can't combine the zextload
14512 // lowering of SRL and an sextload.
14513 if (LN->getExtensionType() == ISD::SEXTLOAD)
14514 return SDValue();
14515
14516 // Avoid reading outside the memory accessed by the original load (could
14517 // happened if we only adjust the load base pointer by ShAmt). Instead we
14518 // try to narrow the load even further. The typical scenario here is:
14519 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14520 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14521 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14522 // Don't replace sextload by zextload.
14523 if (ExtType == ISD::SEXTLOAD)
14524 return SDValue();
14525 // Narrow the load.
14526 ExtType = ISD::ZEXTLOAD;
14527 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14528 }
14529
14530 // If the SRL is only used by a masking AND, we may be able to adjust
14531 // the ExtVT to make the AND redundant.
14532 SDNode *Mask = *(SRL->use_begin());
14533 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14534 isa<ConstantSDNode>(Mask->getOperand(1))) {
14535 unsigned Offset, ActiveBits;
14536 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14537 if (ShiftMask.isMask()) {
14538 EVT MaskedVT =
14539 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14540 // If the mask is smaller, recompute the type.
14541 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14542 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14543 ExtVT = MaskedVT;
14544 } else if (ExtType == ISD::ZEXTLOAD &&
14545 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14546 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14547 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14548 // If the mask is shifted we can use a narrower load and a shl to insert
14549 // the trailing zeros.
14550 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14551 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14552 ExtVT = MaskedVT;
14553 ShAmt = Offset + ShAmt;
14554 ShiftedOffset = Offset;
14555 }
14556 }
14557 }
14558
14559 N0 = SRL.getOperand(0);
14560 }
14561
14562 // If the load is shifted left (and the result isn't shifted back right), we
14563 // can fold a truncate through the shift. The typical scenario is that N
14564 // points at a TRUNCATE here so the attempted fold is:
14565 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14566 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14567 unsigned ShLeftAmt = 0;
14568 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14569 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14570 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14571 ShLeftAmt = N01->getZExtValue();
14572 N0 = N0.getOperand(0);
14573 }
14574 }
14575
14576 // If we haven't found a load, we can't narrow it.
14577 if (!isa<LoadSDNode>(N0))
14578 return SDValue();
14579
14580 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14581 // Reducing the width of a volatile load is illegal. For atomics, we may be
14582 // able to reduce the width provided we never widen again. (see D66309)
14583 if (!LN0->isSimple() ||
14584 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14585 return SDValue();
14586
14587 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14588 unsigned LVTStoreBits =
14590 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14591 return LVTStoreBits - EVTStoreBits - ShAmt;
14592 };
14593
14594 // We need to adjust the pointer to the load by ShAmt bits in order to load
14595 // the correct bytes.
14596 unsigned PtrAdjustmentInBits =
14597 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14598
14599 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14600 SDLoc DL(LN0);
14601 // The original load itself didn't wrap, so an offset within it doesn't.
14603 Flags.setNoUnsignedWrap(true);
14604 SDValue NewPtr = DAG.getMemBasePlusOffset(
14605 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14606 AddToWorklist(NewPtr.getNode());
14607
14608 SDValue Load;
14609 if (ExtType == ISD::NON_EXTLOAD)
14610 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14611 LN0->getPointerInfo().getWithOffset(PtrOff),
14612 LN0->getOriginalAlign(),
14613 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14614 else
14615 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14616 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14617 LN0->getOriginalAlign(),
14618 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14619
14620 // Replace the old load's chain with the new load's chain.
14621 WorklistRemover DeadNodes(*this);
14622 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14623
14624 // Shift the result left, if we've swallowed a left shift.
14626 if (ShLeftAmt != 0) {
14627 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14628 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14629 ShImmTy = VT;
14630 // If the shift amount is as large as the result size (but, presumably,
14631 // no larger than the source) then the useful bits of the result are
14632 // zero; we can't simply return the shortened shift, because the result
14633 // of that operation is undefined.
14634 if (ShLeftAmt >= VT.getScalarSizeInBits())
14635 Result = DAG.getConstant(0, DL, VT);
14636 else
14637 Result = DAG.getNode(ISD::SHL, DL, VT,
14638 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14639 }
14640
14641 if (ShiftedOffset != 0) {
14642 // We're using a shifted mask, so the load now has an offset. This means
14643 // that data has been loaded into the lower bytes than it would have been
14644 // before, so we need to shl the loaded data into the correct position in the
14645 // register.
14646 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14647 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14648 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14649 }
14650
14651 // Return the new loaded value.
14652 return Result;
14653}
14654
14655SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14656 SDValue N0 = N->getOperand(0);
14657 SDValue N1 = N->getOperand(1);
14658 EVT VT = N->getValueType(0);
14659 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14660 unsigned VTBits = VT.getScalarSizeInBits();
14661 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14662
14663 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14664 if (N0.isUndef())
14665 return DAG.getConstant(0, SDLoc(N), VT);
14666
14667 // fold (sext_in_reg c1) -> c1
14669 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14670
14671 // If the input is already sign extended, just drop the extension.
14672 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14673 return N0;
14674
14675 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14676 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14677 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14678 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14679 N1);
14680
14681 // fold (sext_in_reg (sext x)) -> (sext x)
14682 // fold (sext_in_reg (aext x)) -> (sext x)
14683 // if x is small enough or if we know that x has more than 1 sign bit and the
14684 // sign_extend_inreg is extending from one of them.
14685 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14686 SDValue N00 = N0.getOperand(0);
14687 unsigned N00Bits = N00.getScalarValueSizeInBits();
14688 if ((N00Bits <= ExtVTBits ||
14689 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14690 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14691 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14692 }
14693
14694 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14695 // if x is small enough or if we know that x has more than 1 sign bit and the
14696 // sign_extend_inreg is extending from one of them.
14698 SDValue N00 = N0.getOperand(0);
14699 unsigned N00Bits = N00.getScalarValueSizeInBits();
14700 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14701 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14702 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14703 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14704 if ((N00Bits == ExtVTBits ||
14705 (!IsZext && (N00Bits < ExtVTBits ||
14706 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14707 (!LegalOperations ||
14709 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14710 }
14711
14712 // fold (sext_in_reg (zext x)) -> (sext x)
14713 // iff we are extending the source sign bit.
14714 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14715 SDValue N00 = N0.getOperand(0);
14716 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14717 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14718 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14719 }
14720
14721 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14722 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14723 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14724
14725 // fold operands of sext_in_reg based on knowledge that the top bits are not
14726 // demanded.
14728 return SDValue(N, 0);
14729
14730 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14731 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14732 if (SDValue NarrowLoad = reduceLoadWidth(N))
14733 return NarrowLoad;
14734
14735 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14736 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14737 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14738 if (N0.getOpcode() == ISD::SRL) {
14739 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14740 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14741 // We can turn this into an SRA iff the input to the SRL is already sign
14742 // extended enough.
14743 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14744 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14745 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14746 N0.getOperand(1));
14747 }
14748 }
14749
14750 // fold (sext_inreg (extload x)) -> (sextload x)
14751 // If sextload is not supported by target, we can only do the combine when
14752 // load has one use. Doing otherwise can block folding the extload with other
14753 // extends that the target does support.
14754 if (ISD::isEXTLoad(N0.getNode()) &&
14756 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14757 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14758 N0.hasOneUse()) ||
14759 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14760 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14761 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14762 LN0->getChain(),
14763 LN0->getBasePtr(), ExtVT,
14764 LN0->getMemOperand());
14765 CombineTo(N, ExtLoad);
14766 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14767 AddToWorklist(ExtLoad.getNode());
14768 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14769 }
14770
14771 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14773 N0.hasOneUse() &&
14774 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14775 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14776 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14777 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14778 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14779 LN0->getChain(),
14780 LN0->getBasePtr(), ExtVT,
14781 LN0->getMemOperand());
14782 CombineTo(N, ExtLoad);
14783 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14784 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14785 }
14786
14787 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14788 // ignore it if the masked load is already sign extended
14789 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14790 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14791 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14792 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14793 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14794 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14795 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14796 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14797 CombineTo(N, ExtMaskedLoad);
14798 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14799 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14800 }
14801 }
14802
14803 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14804 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14805 if (SDValue(GN0, 0).hasOneUse() &&
14806 ExtVT == GN0->getMemoryVT() &&
14808 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14809 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14810
14811 SDValue ExtLoad = DAG.getMaskedGather(
14812 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14813 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14814
14815 CombineTo(N, ExtLoad);
14816 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14817 AddToWorklist(ExtLoad.getNode());
14818 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14819 }
14820 }
14821
14822 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14823 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14824 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14825 N0.getOperand(1), false))
14826 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14827 }
14828
14829 // Fold (iM_signext_inreg
14830 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14831 // from iN)
14832 // -> (extract_subvector (signext iN_v to iM))
14833 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14835 SDValue InnerExt = N0.getOperand(0);
14836 EVT InnerExtVT = InnerExt->getValueType(0);
14837 SDValue Extendee = InnerExt->getOperand(0);
14838
14839 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14840 (!LegalOperations ||
14841 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14842 SDValue SignExtExtendee =
14843 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14844 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14845 N0.getOperand(1));
14846 }
14847 }
14848
14849 return SDValue();
14850}
14851
14853 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14854 bool LegalOperations) {
14855 unsigned InregOpcode = N->getOpcode();
14856 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14857
14858 SDValue Src = N->getOperand(0);
14859 EVT VT = N->getValueType(0);
14860 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14861 Src.getValueType().getVectorElementType(),
14863
14864 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14865 "Expected EXTEND_VECTOR_INREG dag node in input!");
14866
14867 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14868 // FIXME: one-use check may be overly restrictive
14869 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14870 return SDValue();
14871
14872 // Profitability check: we must be extending exactly one of it's operands.
14873 // FIXME: this is probably overly restrictive.
14874 Src = Src.getOperand(0);
14875 if (Src.getValueType() != SrcVT)
14876 return SDValue();
14877
14878 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14879 return SDValue();
14880
14881 return DAG.getNode(Opcode, DL, VT, Src);
14882}
14883
14884SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14885 SDValue N0 = N->getOperand(0);
14886 EVT VT = N->getValueType(0);
14887 SDLoc DL(N);
14888
14889 if (N0.isUndef()) {
14890 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14891 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14892 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14893 ? DAG.getUNDEF(VT)
14894 : DAG.getConstant(0, DL, VT);
14895 }
14896
14897 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14898 return Res;
14899
14901 return SDValue(N, 0);
14902
14904 LegalOperations))
14905 return R;
14906
14907 return SDValue();
14908}
14909
14910SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14911 SDValue N0 = N->getOperand(0);
14912 EVT VT = N->getValueType(0);
14913 EVT SrcVT = N0.getValueType();
14914 bool isLE = DAG.getDataLayout().isLittleEndian();
14915 SDLoc DL(N);
14916
14917 // trunc(undef) = undef
14918 if (N0.isUndef())
14919 return DAG.getUNDEF(VT);
14920
14921 // fold (truncate (truncate x)) -> (truncate x)
14922 if (N0.getOpcode() == ISD::TRUNCATE)
14923 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14924
14925 // fold (truncate c1) -> c1
14926 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14927 return C;
14928
14929 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14930 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14931 N0.getOpcode() == ISD::SIGN_EXTEND ||
14932 N0.getOpcode() == ISD::ANY_EXTEND) {
14933 // if the source is smaller than the dest, we still need an extend.
14934 if (N0.getOperand(0).getValueType().bitsLT(VT))
14935 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14936 // if the source is larger than the dest, than we just need the truncate.
14937 if (N0.getOperand(0).getValueType().bitsGT(VT))
14938 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14939 // if the source and dest are the same type, we can drop both the extend
14940 // and the truncate.
14941 return N0.getOperand(0);
14942 }
14943
14944 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14945 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14946 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14947 N0.hasOneUse()) {
14948 SDValue X = N0.getOperand(0);
14949 SDValue ExtVal = N0.getOperand(1);
14950 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14951 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14952 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14953 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14954 }
14955 }
14956
14957 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14958 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14959 return SDValue();
14960
14961 // Fold extract-and-trunc into a narrow extract. For example:
14962 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14963 // i32 y = TRUNCATE(i64 x)
14964 // -- becomes --
14965 // v16i8 b = BITCAST (v2i64 val)
14966 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14967 //
14968 // Note: We only run this optimization after type legalization (which often
14969 // creates this pattern) and before operation legalization after which
14970 // we need to be more careful about the vector instructions that we generate.
14971 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14972 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14973 EVT VecTy = N0.getOperand(0).getValueType();
14974 EVT ExTy = N0.getValueType();
14975 EVT TrTy = N->getValueType(0);
14976
14977 auto EltCnt = VecTy.getVectorElementCount();
14978 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14979 auto NewEltCnt = EltCnt * SizeRatio;
14980
14981 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14982 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14983
14984 SDValue EltNo = N0->getOperand(1);
14985 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14986 int Elt = EltNo->getAsZExtVal();
14987 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14988 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14989 DAG.getBitcast(NVT, N0.getOperand(0)),
14991 }
14992 }
14993
14994 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14995 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14996 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14997 TLI.isTruncateFree(SrcVT, VT)) {
14998 SDLoc SL(N0);
14999 SDValue Cond = N0.getOperand(0);
15000 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15001 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15002 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15003 }
15004 }
15005
15006 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15007 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15008 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15009 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15010 SDValue Amt = N0.getOperand(1);
15011 KnownBits Known = DAG.computeKnownBits(Amt);
15012 unsigned Size = VT.getScalarSizeInBits();
15013 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15014 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15015 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15016 if (AmtVT != Amt.getValueType()) {
15017 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15018 AddToWorklist(Amt.getNode());
15019 }
15020 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15021 }
15022 }
15023
15024 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15025 return V;
15026
15027 if (SDValue ABD = foldABSToABD(N, DL))
15028 return ABD;
15029
15030 // Attempt to pre-truncate BUILD_VECTOR sources.
15031 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15032 N0.hasOneUse() &&
15033 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15034 // Avoid creating illegal types if running after type legalizer.
15035 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15036 EVT SVT = VT.getScalarType();
15037 SmallVector<SDValue, 8> TruncOps;
15038 for (const SDValue &Op : N0->op_values()) {
15039 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15040 TruncOps.push_back(TruncOp);
15041 }
15042 return DAG.getBuildVector(VT, DL, TruncOps);
15043 }
15044
15045 // trunc (splat_vector x) -> splat_vector (trunc x)
15046 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15047 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15048 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15049 EVT SVT = VT.getScalarType();
15050 return DAG.getSplatVector(
15051 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15052 }
15053
15054 // Fold a series of buildvector, bitcast, and truncate if possible.
15055 // For example fold
15056 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15057 // (2xi32 (buildvector x, y)).
15058 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15059 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15061 N0.getOperand(0).hasOneUse()) {
15062 SDValue BuildVect = N0.getOperand(0);
15063 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15064 EVT TruncVecEltTy = VT.getVectorElementType();
15065
15066 // Check that the element types match.
15067 if (BuildVectEltTy == TruncVecEltTy) {
15068 // Now we only need to compute the offset of the truncated elements.
15069 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15070 unsigned TruncVecNumElts = VT.getVectorNumElements();
15071 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15072
15073 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15074 "Invalid number of elements");
15075
15077 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
15078 Opnds.push_back(BuildVect.getOperand(i));
15079
15080 return DAG.getBuildVector(VT, DL, Opnds);
15081 }
15082 }
15083
15084 // fold (truncate (load x)) -> (smaller load x)
15085 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15086 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15087 if (SDValue Reduced = reduceLoadWidth(N))
15088 return Reduced;
15089
15090 // Handle the case where the truncated result is at least as wide as the
15091 // loaded type.
15092 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15093 auto *LN0 = cast<LoadSDNode>(N0);
15094 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15095 SDValue NewLoad = DAG.getExtLoad(
15096 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15097 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15098 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15099 return NewLoad;
15100 }
15101 }
15102 }
15103
15104 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15105 // where ... are all 'undef'.
15106 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15108 SDValue V;
15109 unsigned Idx = 0;
15110 unsigned NumDefs = 0;
15111
15112 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15113 SDValue X = N0.getOperand(i);
15114 if (!X.isUndef()) {
15115 V = X;
15116 Idx = i;
15117 NumDefs++;
15118 }
15119 // Stop if more than one members are non-undef.
15120 if (NumDefs > 1)
15121 break;
15122
15125 X.getValueType().getVectorElementCount()));
15126 }
15127
15128 if (NumDefs == 0)
15129 return DAG.getUNDEF(VT);
15130
15131 if (NumDefs == 1) {
15132 assert(V.getNode() && "The single defined operand is empty!");
15134 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15135 if (i != Idx) {
15136 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15137 continue;
15138 }
15139 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15140 AddToWorklist(NV.getNode());
15141 Opnds.push_back(NV);
15142 }
15143 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15144 }
15145 }
15146
15147 // Fold truncate of a bitcast of a vector to an extract of the low vector
15148 // element.
15149 //
15150 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15151 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15152 SDValue VecSrc = N0.getOperand(0);
15153 EVT VecSrcVT = VecSrc.getValueType();
15154 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15155 (!LegalOperations ||
15156 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15157 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15158 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15160 }
15161 }
15162
15163 // Simplify the operands using demanded-bits information.
15165 return SDValue(N, 0);
15166
15167 // fold (truncate (extract_subvector(ext x))) ->
15168 // (extract_subvector x)
15169 // TODO: This can be generalized to cover cases where the truncate and extract
15170 // do not fully cancel each other out.
15171 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15172 SDValue N00 = N0.getOperand(0);
15173 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15174 N00.getOpcode() == ISD::ZERO_EXTEND ||
15175 N00.getOpcode() == ISD::ANY_EXTEND) {
15176 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15178 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15179 N00.getOperand(0), N0.getOperand(1));
15180 }
15181 }
15182
15183 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15184 return NewVSel;
15185
15186 // Narrow a suitable binary operation with a non-opaque constant operand by
15187 // moving it ahead of the truncate. This is limited to pre-legalization
15188 // because targets may prefer a wider type during later combines and invert
15189 // this transform.
15190 switch (N0.getOpcode()) {
15191 case ISD::ADD:
15192 case ISD::SUB:
15193 case ISD::MUL:
15194 case ISD::AND:
15195 case ISD::OR:
15196 case ISD::XOR:
15197 if (!LegalOperations && N0.hasOneUse() &&
15198 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15199 isConstantOrConstantVector(N0.getOperand(1), true))) {
15200 // TODO: We already restricted this to pre-legalization, but for vectors
15201 // we are extra cautious to not create an unsupported operation.
15202 // Target-specific changes are likely needed to avoid regressions here.
15203 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15204 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15205 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15206 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15207 }
15208 }
15209 break;
15210 case ISD::ADDE:
15211 case ISD::UADDO_CARRY:
15212 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15213 // (trunc uaddo_carry(X, Y, Carry)) ->
15214 // (uaddo_carry trunc(X), trunc(Y), Carry)
15215 // When the adde's carry is not used.
15216 // We only do for uaddo_carry before legalize operation
15217 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15218 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15219 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15220 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15221 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15222 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15223 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15224 }
15225 break;
15226 case ISD::USUBSAT:
15227 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15228 // enough to know that the upper bits are zero we must ensure that we don't
15229 // introduce an extra truncate.
15230 if (!LegalOperations && N0.hasOneUse() &&
15233 VT.getScalarSizeInBits() &&
15234 hasOperation(N0.getOpcode(), VT)) {
15235 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15236 DAG, DL);
15237 }
15238 break;
15239 }
15240
15241 return SDValue();
15242}
15243
15244static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15245 SDValue Elt = N->getOperand(i);
15246 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15247 return Elt.getNode();
15248 return Elt.getOperand(Elt.getResNo()).getNode();
15249}
15250
15251/// build_pair (load, load) -> load
15252/// if load locations are consecutive.
15253SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15254 assert(N->getOpcode() == ISD::BUILD_PAIR);
15255
15256 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15257 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15258
15259 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15260 // most significant part in elt 1. So when combining into one large load, we
15261 // need to consider the endianness.
15262 if (DAG.getDataLayout().isBigEndian())
15263 std::swap(LD1, LD2);
15264
15265 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15266 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15267 LD1->getAddressSpace() != LD2->getAddressSpace())
15268 return SDValue();
15269
15270 unsigned LD1Fast = 0;
15271 EVT LD1VT = LD1->getValueType(0);
15272 unsigned LD1Bytes = LD1VT.getStoreSize();
15273 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15274 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15275 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15276 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15277 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15278 LD1->getPointerInfo(), LD1->getAlign());
15279
15280 return SDValue();
15281}
15282
15283static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15284 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15285 // and Lo parts; on big-endian machines it doesn't.
15286 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15287}
15288
15289SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15290 const TargetLowering &TLI) {
15291 // If this is not a bitcast to an FP type or if the target doesn't have
15292 // IEEE754-compliant FP logic, we're done.
15293 EVT VT = N->getValueType(0);
15294 SDValue N0 = N->getOperand(0);
15295 EVT SourceVT = N0.getValueType();
15296
15297 if (!VT.isFloatingPoint())
15298 return SDValue();
15299
15300 // TODO: Handle cases where the integer constant is a different scalar
15301 // bitwidth to the FP.
15302 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15303 return SDValue();
15304
15305 unsigned FPOpcode;
15306 APInt SignMask;
15307 switch (N0.getOpcode()) {
15308 case ISD::AND:
15309 FPOpcode = ISD::FABS;
15310 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15311 break;
15312 case ISD::XOR:
15313 FPOpcode = ISD::FNEG;
15314 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15315 break;
15316 case ISD::OR:
15317 FPOpcode = ISD::FABS;
15318 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15319 break;
15320 default:
15321 return SDValue();
15322 }
15323
15324 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15325 return SDValue();
15326
15327 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15328 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15329 // removing this would require more changes.
15330 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15331 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15332 return true;
15333
15334 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15335 };
15336
15337 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15338 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15339 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15340 // fneg (fabs X)
15341 SDValue LogicOp0 = N0.getOperand(0);
15342 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15343 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15344 IsBitCastOrFree(LogicOp0, VT)) {
15345 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15346 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15347 NumFPLogicOpsConv++;
15348 if (N0.getOpcode() == ISD::OR)
15349 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15350 return FPOp;
15351 }
15352
15353 return SDValue();
15354}
15355
15356SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15357 SDValue N0 = N->getOperand(0);
15358 EVT VT = N->getValueType(0);
15359
15360 if (N0.isUndef())
15361 return DAG.getUNDEF(VT);
15362
15363 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15364 // Only do this before legalize types, unless both types are integer and the
15365 // scalar type is legal. Only do this before legalize ops, since the target
15366 // maybe depending on the bitcast.
15367 // First check to see if this is all constant.
15368 // TODO: Support FP bitcasts after legalize types.
15369 if (VT.isVector() &&
15370 (!LegalTypes ||
15371 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15372 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15373 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15374 cast<BuildVectorSDNode>(N0)->isConstant())
15375 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15377
15378 // If the input is a constant, let getNode fold it.
15379 if (isIntOrFPConstant(N0)) {
15380 // If we can't allow illegal operations, we need to check that this is just
15381 // a fp -> int or int -> conversion and that the resulting operation will
15382 // be legal.
15383 if (!LegalOperations ||
15384 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15386 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15387 TLI.isOperationLegal(ISD::Constant, VT))) {
15388 SDValue C = DAG.getBitcast(VT, N0);
15389 if (C.getNode() != N)
15390 return C;
15391 }
15392 }
15393
15394 // (conv (conv x, t1), t2) -> (conv x, t2)
15395 if (N0.getOpcode() == ISD::BITCAST)
15396 return DAG.getBitcast(VT, N0.getOperand(0));
15397
15398 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15399 // iff the current bitwise logicop type isn't legal
15400 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15401 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15402 auto IsFreeBitcast = [VT](SDValue V) {
15403 return (V.getOpcode() == ISD::BITCAST &&
15404 V.getOperand(0).getValueType() == VT) ||
15406 V->hasOneUse());
15407 };
15408 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15409 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15410 DAG.getBitcast(VT, N0.getOperand(0)),
15411 DAG.getBitcast(VT, N0.getOperand(1)));
15412 }
15413
15414 // fold (conv (load x)) -> (load (conv*)x)
15415 // If the resultant load doesn't need a higher alignment than the original!
15416 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15417 // Do not remove the cast if the types differ in endian layout.
15419 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15420 // If the load is volatile, we only want to change the load type if the
15421 // resulting load is legal. Otherwise we might increase the number of
15422 // memory accesses. We don't care if the original type was legal or not
15423 // as we assume software couldn't rely on the number of accesses of an
15424 // illegal type.
15425 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15426 TLI.isOperationLegal(ISD::LOAD, VT))) {
15427 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15428
15429 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15430 *LN0->getMemOperand())) {
15431 SDValue Load =
15432 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15433 LN0->getMemOperand());
15434 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15435 return Load;
15436 }
15437 }
15438
15439 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15440 return V;
15441
15442 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15443 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15444 //
15445 // For ppc_fp128:
15446 // fold (bitcast (fneg x)) ->
15447 // flipbit = signbit
15448 // (xor (bitcast x) (build_pair flipbit, flipbit))
15449 //
15450 // fold (bitcast (fabs x)) ->
15451 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15452 // (xor (bitcast x) (build_pair flipbit, flipbit))
15453 // This often reduces constant pool loads.
15454 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15455 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15456 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15457 !N0.getValueType().isVector()) {
15458 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15459 AddToWorklist(NewConv.getNode());
15460
15461 SDLoc DL(N);
15462 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15463 assert(VT.getSizeInBits() == 128);
15464 SDValue SignBit = DAG.getConstant(
15465 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15466 SDValue FlipBit;
15467 if (N0.getOpcode() == ISD::FNEG) {
15468 FlipBit = SignBit;
15469 AddToWorklist(FlipBit.getNode());
15470 } else {
15471 assert(N0.getOpcode() == ISD::FABS);
15472 SDValue Hi =
15473 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15475 SDLoc(NewConv)));
15476 AddToWorklist(Hi.getNode());
15477 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15478 AddToWorklist(FlipBit.getNode());
15479 }
15480 SDValue FlipBits =
15481 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15482 AddToWorklist(FlipBits.getNode());
15483 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15484 }
15485 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15486 if (N0.getOpcode() == ISD::FNEG)
15487 return DAG.getNode(ISD::XOR, DL, VT,
15488 NewConv, DAG.getConstant(SignBit, DL, VT));
15489 assert(N0.getOpcode() == ISD::FABS);
15490 return DAG.getNode(ISD::AND, DL, VT,
15491 NewConv, DAG.getConstant(~SignBit, DL, VT));
15492 }
15493
15494 // fold (bitconvert (fcopysign cst, x)) ->
15495 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15496 // Note that we don't handle (copysign x, cst) because this can always be
15497 // folded to an fneg or fabs.
15498 //
15499 // For ppc_fp128:
15500 // fold (bitcast (fcopysign cst, x)) ->
15501 // flipbit = (and (extract_element
15502 // (xor (bitcast cst), (bitcast x)), 0),
15503 // signbit)
15504 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15505 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15506 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15507 !VT.isVector()) {
15508 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15509 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15510 if (isTypeLegal(IntXVT)) {
15511 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15512 AddToWorklist(X.getNode());
15513
15514 // If X has a different width than the result/lhs, sext it or truncate it.
15515 unsigned VTWidth = VT.getSizeInBits();
15516 if (OrigXWidth < VTWidth) {
15517 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15518 AddToWorklist(X.getNode());
15519 } else if (OrigXWidth > VTWidth) {
15520 // To get the sign bit in the right place, we have to shift it right
15521 // before truncating.
15522 SDLoc DL(X);
15523 X = DAG.getNode(ISD::SRL, DL,
15524 X.getValueType(), X,
15525 DAG.getConstant(OrigXWidth-VTWidth, DL,
15526 X.getValueType()));
15527 AddToWorklist(X.getNode());
15528 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15529 AddToWorklist(X.getNode());
15530 }
15531
15532 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15533 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15534 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15535 AddToWorklist(Cst.getNode());
15536 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15537 AddToWorklist(X.getNode());
15538 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15539 AddToWorklist(XorResult.getNode());
15540 SDValue XorResult64 = DAG.getNode(
15541 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15543 SDLoc(XorResult)));
15544 AddToWorklist(XorResult64.getNode());
15545 SDValue FlipBit =
15546 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15547 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15548 AddToWorklist(FlipBit.getNode());
15549 SDValue FlipBits =
15550 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15551 AddToWorklist(FlipBits.getNode());
15552 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15553 }
15554 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15555 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15556 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15557 AddToWorklist(X.getNode());
15558
15559 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15560 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15561 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15562 AddToWorklist(Cst.getNode());
15563
15564 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15565 }
15566 }
15567
15568 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15569 if (N0.getOpcode() == ISD::BUILD_PAIR)
15570 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15571 return CombineLD;
15572
15573 // Remove double bitcasts from shuffles - this is often a legacy of
15574 // XformToShuffleWithZero being used to combine bitmaskings (of
15575 // float vectors bitcast to integer vectors) into shuffles.
15576 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15577 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15578 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15581 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15582
15583 // If operands are a bitcast, peek through if it casts the original VT.
15584 // If operands are a constant, just bitcast back to original VT.
15585 auto PeekThroughBitcast = [&](SDValue Op) {
15586 if (Op.getOpcode() == ISD::BITCAST &&
15587 Op.getOperand(0).getValueType() == VT)
15588 return SDValue(Op.getOperand(0));
15589 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15590 return DAG.getBitcast(VT, Op);
15591 return SDValue();
15592 };
15593
15594 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15595 // the result type of this bitcast. This would eliminate at least one
15596 // bitcast. See the transform in InstCombine.
15597 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15598 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15599 if (!(SV0 && SV1))
15600 return SDValue();
15601
15602 int MaskScale =
15604 SmallVector<int, 8> NewMask;
15605 for (int M : SVN->getMask())
15606 for (int i = 0; i != MaskScale; ++i)
15607 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15608
15609 SDValue LegalShuffle =
15610 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15611 if (LegalShuffle)
15612 return LegalShuffle;
15613 }
15614
15615 return SDValue();
15616}
15617
15618SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15619 EVT VT = N->getValueType(0);
15620 return CombineConsecutiveLoads(N, VT);
15621}
15622
15623SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15624 SDValue N0 = N->getOperand(0);
15625
15626 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15627 return N0;
15628
15629 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
15630 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15631 // example https://reviews.llvm.org/D136529#4120959.
15632 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15633 return SDValue();
15634
15635 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15636 // Try to push freeze through instructions that propagate but don't produce
15637 // poison as far as possible. If an operand of freeze follows three
15638 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15639 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15640 // the freeze through to the operands that are not guaranteed non-poison.
15641 // NOTE: we will strip poison-generating flags, so ignore them here.
15642 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15643 /*ConsiderFlags*/ false) ||
15644 N0->getNumValues() != 1 || !N0->hasOneUse())
15645 return SDValue();
15646
15647 bool AllowMultipleMaybePoisonOperands =
15648 N0.getOpcode() == ISD::BUILD_VECTOR ||
15649 N0.getOpcode() == ISD::BUILD_PAIR ||
15652
15653 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15654 // ones" or "constant" into something that depends on FrozenUndef. We can
15655 // instead pick undef values to keep those properties, while at the same time
15656 // folding away the freeze.
15657 // If we implement a more general solution for folding away freeze(undef) in
15658 // the future, then this special handling can be removed.
15659 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15660 SDLoc DL(N0);
15661 EVT VT = N0.getValueType();
15663 return DAG.getAllOnesConstant(DL, VT);
15666 for (const SDValue &Op : N0->op_values())
15667 NewVecC.push_back(
15668 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15669 return DAG.getBuildVector(VT, DL, NewVecC);
15670 }
15671 }
15672
15673 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15674 for (SDValue Op : N0->ops()) {
15675 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15676 /*Depth*/ 1))
15677 continue;
15678 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15679 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15680 if (!HadMaybePoisonOperands)
15681 continue;
15682 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15683 // Multiple maybe-poison ops when not allowed - bail out.
15684 return SDValue();
15685 }
15686 }
15687 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15688 // it could create undef or poison due to it's poison-generating flags.
15689 // So not finding any maybe-poison operands is fine.
15690
15691 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15692 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15693 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15694 continue;
15695 // First, freeze each offending operand.
15696 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15697 // Then, change all other uses of unfrozen operand to use frozen operand.
15698 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15699 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15700 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15701 // But, that also updated the use in the freeze we just created, thus
15702 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15703 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15704 MaybePoisonOperand);
15705 }
15706 }
15707
15708 // This node has been merged with another.
15709 if (N->getOpcode() == ISD::DELETED_NODE)
15710 return SDValue(N, 0);
15711
15712 // The whole node may have been updated, so the value we were holding
15713 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15714 N0 = N->getOperand(0);
15715
15716 // Finally, recreate the node, it's operands were updated to use
15717 // frozen operands, so we just need to use it's "original" operands.
15718 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15719 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15720 for (SDValue &Op : Ops) {
15721 if (Op.getOpcode() == ISD::UNDEF)
15722 Op = DAG.getFreeze(Op);
15723 }
15724
15725 SDValue R;
15726 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
15727 // Special case handling for ShuffleVectorSDNode nodes.
15728 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
15729 SVN->getMask());
15730 } else {
15731 // NOTE: this strips poison generating flags.
15732 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15733 }
15734 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15735 "Can't create node that may be undef/poison!");
15736 return R;
15737}
15738
15739/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15740/// operands. DstEltVT indicates the destination element value type.
15741SDValue DAGCombiner::
15742ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15743 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15744
15745 // If this is already the right type, we're done.
15746 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15747
15748 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15749 unsigned DstBitSize = DstEltVT.getSizeInBits();
15750
15751 // If this is a conversion of N elements of one type to N elements of another
15752 // type, convert each element. This handles FP<->INT cases.
15753 if (SrcBitSize == DstBitSize) {
15755 for (SDValue Op : BV->op_values()) {
15756 // If the vector element type is not legal, the BUILD_VECTOR operands
15757 // are promoted and implicitly truncated. Make that explicit here.
15758 if (Op.getValueType() != SrcEltVT)
15759 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15760 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15761 AddToWorklist(Ops.back().getNode());
15762 }
15763 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15765 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15766 }
15767
15768 // Otherwise, we're growing or shrinking the elements. To avoid having to
15769 // handle annoying details of growing/shrinking FP values, we convert them to
15770 // int first.
15771 if (SrcEltVT.isFloatingPoint()) {
15772 // Convert the input float vector to a int vector where the elements are the
15773 // same sizes.
15774 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15775 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15776 SrcEltVT = IntVT;
15777 }
15778
15779 // Now we know the input is an integer vector. If the output is a FP type,
15780 // convert to integer first, then to FP of the right size.
15781 if (DstEltVT.isFloatingPoint()) {
15782 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15783 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15784
15785 // Next, convert to FP elements of the same size.
15786 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15787 }
15788
15789 // Okay, we know the src/dst types are both integers of differing types.
15790 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15791
15792 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15793 // BuildVectorSDNode?
15794 auto *BVN = cast<BuildVectorSDNode>(BV);
15795
15796 // Extract the constant raw bit data.
15797 BitVector UndefElements;
15798 SmallVector<APInt> RawBits;
15799 bool IsLE = DAG.getDataLayout().isLittleEndian();
15800 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15801 return SDValue();
15802
15803 SDLoc DL(BV);
15805 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15806 if (UndefElements[I])
15807 Ops.push_back(DAG.getUNDEF(DstEltVT));
15808 else
15809 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15810 }
15811
15812 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15813 return DAG.getBuildVector(VT, DL, Ops);
15814}
15815
15816// Returns true if floating point contraction is allowed on the FMUL-SDValue
15817// `N`
15819 assert(N.getOpcode() == ISD::FMUL);
15820
15821 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15822 N->getFlags().hasAllowContract();
15823}
15824
15825// Returns true if `N` can assume no infinities involved in its computation.
15827 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15828}
15829
15830/// Try to perform FMA combining on a given FADD node.
15831template <class MatchContextClass>
15832SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15833 SDValue N0 = N->getOperand(0);
15834 SDValue N1 = N->getOperand(1);
15835 EVT VT = N->getValueType(0);
15836 SDLoc SL(N);
15837 MatchContextClass matcher(DAG, TLI, N);
15838 const TargetOptions &Options = DAG.getTarget().Options;
15839
15840 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15841
15842 // Floating-point multiply-add with intermediate rounding.
15843 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15844 // FIXME: Add VP_FMAD opcode.
15845 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15846
15847 // Floating-point multiply-add without intermediate rounding.
15848 bool HasFMA =
15850 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15851
15852 // No valid opcode, do not combine.
15853 if (!HasFMAD && !HasFMA)
15854 return SDValue();
15855
15856 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15857 Options.UnsafeFPMath || HasFMAD);
15858 // If the addition is not contractable, do not combine.
15859 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15860 return SDValue();
15861
15862 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15863 // beneficial. It does not reduce latency. It increases register pressure. It
15864 // replaces an fadd with an fma which is a more complex instruction, so is
15865 // likely to have a larger encoding, use more functional units, etc.
15866 if (N0 == N1)
15867 return SDValue();
15868
15869 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15870 return SDValue();
15871
15872 // Always prefer FMAD to FMA for precision.
15873 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15875
15876 auto isFusedOp = [&](SDValue N) {
15877 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15878 };
15879
15880 // Is the node an FMUL and contractable either due to global flags or
15881 // SDNodeFlags.
15882 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15883 if (!matcher.match(N, ISD::FMUL))
15884 return false;
15885 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15886 };
15887 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15888 // prefer to fold the multiply with fewer uses.
15890 if (N0->use_size() > N1->use_size())
15891 std::swap(N0, N1);
15892 }
15893
15894 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15895 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15896 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15897 N0.getOperand(1), N1);
15898 }
15899
15900 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15901 // Note: Commutes FADD operands.
15902 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15903 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15904 N1.getOperand(1), N0);
15905 }
15906
15907 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15908 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15909 // This also works with nested fma instructions:
15910 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15911 // fma A, B, (fma C, D, fma (E, F, G))
15912 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15913 // fma A, B, (fma C, D, fma (E, F, G)).
15914 // This requires reassociation because it changes the order of operations.
15915 bool CanReassociate =
15916 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15917 if (CanReassociate) {
15918 SDValue FMA, E;
15919 if (isFusedOp(N0) && N0.hasOneUse()) {
15920 FMA = N0;
15921 E = N1;
15922 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15923 FMA = N1;
15924 E = N0;
15925 }
15926
15927 SDValue TmpFMA = FMA;
15928 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15929 SDValue FMul = TmpFMA->getOperand(2);
15930 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15931 SDValue C = FMul.getOperand(0);
15932 SDValue D = FMul.getOperand(1);
15933 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15935 // Replacing the inner FMul could cause the outer FMA to be simplified
15936 // away.
15937 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15938 }
15939
15940 TmpFMA = TmpFMA->getOperand(2);
15941 }
15942 }
15943
15944 // Look through FP_EXTEND nodes to do more combining.
15945
15946 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15947 if (matcher.match(N0, ISD::FP_EXTEND)) {
15948 SDValue N00 = N0.getOperand(0);
15949 if (isContractableFMUL(N00) &&
15950 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15951 N00.getValueType())) {
15952 return matcher.getNode(
15953 PreferredFusedOpcode, SL, VT,
15954 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15955 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15956 }
15957 }
15958
15959 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15960 // Note: Commutes FADD operands.
15961 if (matcher.match(N1, ISD::FP_EXTEND)) {
15962 SDValue N10 = N1.getOperand(0);
15963 if (isContractableFMUL(N10) &&
15964 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15965 N10.getValueType())) {
15966 return matcher.getNode(
15967 PreferredFusedOpcode, SL, VT,
15968 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15969 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15970 }
15971 }
15972
15973 // More folding opportunities when target permits.
15974 if (Aggressive) {
15975 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15976 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15977 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15978 SDValue Z) {
15979 return matcher.getNode(
15980 PreferredFusedOpcode, SL, VT, X, Y,
15981 matcher.getNode(PreferredFusedOpcode, SL, VT,
15982 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15983 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15984 };
15985 if (isFusedOp(N0)) {
15986 SDValue N02 = N0.getOperand(2);
15987 if (matcher.match(N02, ISD::FP_EXTEND)) {
15988 SDValue N020 = N02.getOperand(0);
15989 if (isContractableFMUL(N020) &&
15990 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15991 N020.getValueType())) {
15992 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15993 N020.getOperand(0), N020.getOperand(1),
15994 N1);
15995 }
15996 }
15997 }
15998
15999 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16000 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16001 // FIXME: This turns two single-precision and one double-precision
16002 // operation into two double-precision operations, which might not be
16003 // interesting for all targets, especially GPUs.
16004 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16005 SDValue Z) {
16006 return matcher.getNode(
16007 PreferredFusedOpcode, SL, VT,
16008 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16009 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16010 matcher.getNode(PreferredFusedOpcode, SL, VT,
16011 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16012 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16013 };
16014 if (N0.getOpcode() == ISD::FP_EXTEND) {
16015 SDValue N00 = N0.getOperand(0);
16016 if (isFusedOp(N00)) {
16017 SDValue N002 = N00.getOperand(2);
16018 if (isContractableFMUL(N002) &&
16019 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16020 N00.getValueType())) {
16021 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16022 N002.getOperand(0), N002.getOperand(1),
16023 N1);
16024 }
16025 }
16026 }
16027
16028 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16029 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16030 if (isFusedOp(N1)) {
16031 SDValue N12 = N1.getOperand(2);
16032 if (N12.getOpcode() == ISD::FP_EXTEND) {
16033 SDValue N120 = N12.getOperand(0);
16034 if (isContractableFMUL(N120) &&
16035 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16036 N120.getValueType())) {
16037 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16038 N120.getOperand(0), N120.getOperand(1),
16039 N0);
16040 }
16041 }
16042 }
16043
16044 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16045 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16046 // FIXME: This turns two single-precision and one double-precision
16047 // operation into two double-precision operations, which might not be
16048 // interesting for all targets, especially GPUs.
16049 if (N1.getOpcode() == ISD::FP_EXTEND) {
16050 SDValue N10 = N1.getOperand(0);
16051 if (isFusedOp(N10)) {
16052 SDValue N102 = N10.getOperand(2);
16053 if (isContractableFMUL(N102) &&
16054 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16055 N10.getValueType())) {
16056 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16057 N102.getOperand(0), N102.getOperand(1),
16058 N0);
16059 }
16060 }
16061 }
16062 }
16063
16064 return SDValue();
16065}
16066
16067/// Try to perform FMA combining on a given FSUB node.
16068template <class MatchContextClass>
16069SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16070 SDValue N0 = N->getOperand(0);
16071 SDValue N1 = N->getOperand(1);
16072 EVT VT = N->getValueType(0);
16073 SDLoc SL(N);
16074 MatchContextClass matcher(DAG, TLI, N);
16075 const TargetOptions &Options = DAG.getTarget().Options;
16076
16077 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16078
16079 // Floating-point multiply-add with intermediate rounding.
16080 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16081 // FIXME: Add VP_FMAD opcode.
16082 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16083
16084 // Floating-point multiply-add without intermediate rounding.
16085 bool HasFMA =
16087 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
16088
16089 // No valid opcode, do not combine.
16090 if (!HasFMAD && !HasFMA)
16091 return SDValue();
16092
16093 const SDNodeFlags Flags = N->getFlags();
16094 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16095 Options.UnsafeFPMath || HasFMAD);
16096
16097 // If the subtraction is not contractable, do not combine.
16098 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16099 return SDValue();
16100
16101 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16102 return SDValue();
16103
16104 // Always prefer FMAD to FMA for precision.
16105 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16107 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16108
16109 // Is the node an FMUL and contractable either due to global flags or
16110 // SDNodeFlags.
16111 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16112 if (!matcher.match(N, ISD::FMUL))
16113 return false;
16114 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16115 };
16116
16117 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16118 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16119 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16120 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16121 XY.getOperand(1),
16122 matcher.getNode(ISD::FNEG, SL, VT, Z));
16123 }
16124 return SDValue();
16125 };
16126
16127 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16128 // Note: Commutes FSUB operands.
16129 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16130 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16131 return matcher.getNode(
16132 PreferredFusedOpcode, SL, VT,
16133 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16134 YZ.getOperand(1), X);
16135 }
16136 return SDValue();
16137 };
16138
16139 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16140 // prefer to fold the multiply with fewer uses.
16141 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16142 (N0->use_size() > N1->use_size())) {
16143 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16144 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16145 return V;
16146 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16147 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16148 return V;
16149 } else {
16150 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16151 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16152 return V;
16153 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16154 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16155 return V;
16156 }
16157
16158 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16159 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16160 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16161 SDValue N00 = N0.getOperand(0).getOperand(0);
16162 SDValue N01 = N0.getOperand(0).getOperand(1);
16163 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16164 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16165 matcher.getNode(ISD::FNEG, SL, VT, N1));
16166 }
16167
16168 // Look through FP_EXTEND nodes to do more combining.
16169
16170 // fold (fsub (fpext (fmul x, y)), z)
16171 // -> (fma (fpext x), (fpext y), (fneg z))
16172 if (matcher.match(N0, ISD::FP_EXTEND)) {
16173 SDValue N00 = N0.getOperand(0);
16174 if (isContractableFMUL(N00) &&
16175 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16176 N00.getValueType())) {
16177 return matcher.getNode(
16178 PreferredFusedOpcode, SL, VT,
16179 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16180 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16181 matcher.getNode(ISD::FNEG, SL, VT, N1));
16182 }
16183 }
16184
16185 // fold (fsub x, (fpext (fmul y, z)))
16186 // -> (fma (fneg (fpext y)), (fpext z), x)
16187 // Note: Commutes FSUB operands.
16188 if (matcher.match(N1, ISD::FP_EXTEND)) {
16189 SDValue N10 = N1.getOperand(0);
16190 if (isContractableFMUL(N10) &&
16191 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16192 N10.getValueType())) {
16193 return matcher.getNode(
16194 PreferredFusedOpcode, SL, VT,
16195 matcher.getNode(
16196 ISD::FNEG, SL, VT,
16197 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16198 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16199 }
16200 }
16201
16202 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16203 // -> (fneg (fma (fpext x), (fpext y), z))
16204 // Note: This could be removed with appropriate canonicalization of the
16205 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16206 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16207 // from implementing the canonicalization in visitFSUB.
16208 if (matcher.match(N0, ISD::FP_EXTEND)) {
16209 SDValue N00 = N0.getOperand(0);
16210 if (matcher.match(N00, ISD::FNEG)) {
16211 SDValue N000 = N00.getOperand(0);
16212 if (isContractableFMUL(N000) &&
16213 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16214 N00.getValueType())) {
16215 return matcher.getNode(
16216 ISD::FNEG, SL, VT,
16217 matcher.getNode(
16218 PreferredFusedOpcode, SL, VT,
16219 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16220 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16221 N1));
16222 }
16223 }
16224 }
16225
16226 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16227 // -> (fneg (fma (fpext x)), (fpext y), z)
16228 // Note: This could be removed with appropriate canonicalization of the
16229 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16230 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16231 // from implementing the canonicalization in visitFSUB.
16232 if (matcher.match(N0, ISD::FNEG)) {
16233 SDValue N00 = N0.getOperand(0);
16234 if (matcher.match(N00, ISD::FP_EXTEND)) {
16235 SDValue N000 = N00.getOperand(0);
16236 if (isContractableFMUL(N000) &&
16237 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16238 N000.getValueType())) {
16239 return matcher.getNode(
16240 ISD::FNEG, SL, VT,
16241 matcher.getNode(
16242 PreferredFusedOpcode, SL, VT,
16243 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16244 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16245 N1));
16246 }
16247 }
16248 }
16249
16250 auto isReassociable = [&Options](SDNode *N) {
16251 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16252 };
16253
16254 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16255 &isReassociable](SDValue N) {
16256 return isContractableFMUL(N) && isReassociable(N.getNode());
16257 };
16258
16259 auto isFusedOp = [&](SDValue N) {
16260 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16261 };
16262
16263 // More folding opportunities when target permits.
16264 if (Aggressive && isReassociable(N)) {
16265 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16266 // fold (fsub (fma x, y, (fmul u, v)), z)
16267 // -> (fma x, y (fma u, v, (fneg z)))
16268 if (CanFuse && isFusedOp(N0) &&
16269 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16270 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16271 return matcher.getNode(
16272 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16273 matcher.getNode(PreferredFusedOpcode, SL, VT,
16274 N0.getOperand(2).getOperand(0),
16275 N0.getOperand(2).getOperand(1),
16276 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16277 }
16278
16279 // fold (fsub x, (fma y, z, (fmul u, v)))
16280 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16281 if (CanFuse && isFusedOp(N1) &&
16282 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16283 N1->hasOneUse() && NoSignedZero) {
16284 SDValue N20 = N1.getOperand(2).getOperand(0);
16285 SDValue N21 = N1.getOperand(2).getOperand(1);
16286 return matcher.getNode(
16287 PreferredFusedOpcode, SL, VT,
16288 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16289 N1.getOperand(1),
16290 matcher.getNode(PreferredFusedOpcode, SL, VT,
16291 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16292 }
16293
16294 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16295 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16296 if (isFusedOp(N0) && N0->hasOneUse()) {
16297 SDValue N02 = N0.getOperand(2);
16298 if (matcher.match(N02, ISD::FP_EXTEND)) {
16299 SDValue N020 = N02.getOperand(0);
16300 if (isContractableAndReassociableFMUL(N020) &&
16301 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16302 N020.getValueType())) {
16303 return matcher.getNode(
16304 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16305 matcher.getNode(
16306 PreferredFusedOpcode, SL, VT,
16307 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16308 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16309 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16310 }
16311 }
16312 }
16313
16314 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16315 // -> (fma (fpext x), (fpext y),
16316 // (fma (fpext u), (fpext v), (fneg z)))
16317 // FIXME: This turns two single-precision and one double-precision
16318 // operation into two double-precision operations, which might not be
16319 // interesting for all targets, especially GPUs.
16320 if (matcher.match(N0, ISD::FP_EXTEND)) {
16321 SDValue N00 = N0.getOperand(0);
16322 if (isFusedOp(N00)) {
16323 SDValue N002 = N00.getOperand(2);
16324 if (isContractableAndReassociableFMUL(N002) &&
16325 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16326 N00.getValueType())) {
16327 return matcher.getNode(
16328 PreferredFusedOpcode, SL, VT,
16329 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16330 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16331 matcher.getNode(
16332 PreferredFusedOpcode, SL, VT,
16333 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16334 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16335 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16336 }
16337 }
16338 }
16339
16340 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16341 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16342 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16343 N1->hasOneUse()) {
16344 SDValue N120 = N1.getOperand(2).getOperand(0);
16345 if (isContractableAndReassociableFMUL(N120) &&
16346 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16347 N120.getValueType())) {
16348 SDValue N1200 = N120.getOperand(0);
16349 SDValue N1201 = N120.getOperand(1);
16350 return matcher.getNode(
16351 PreferredFusedOpcode, SL, VT,
16352 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16353 N1.getOperand(1),
16354 matcher.getNode(
16355 PreferredFusedOpcode, SL, VT,
16356 matcher.getNode(ISD::FNEG, SL, VT,
16357 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16358 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16359 }
16360 }
16361
16362 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16363 // -> (fma (fneg (fpext y)), (fpext z),
16364 // (fma (fneg (fpext u)), (fpext v), x))
16365 // FIXME: This turns two single-precision and one double-precision
16366 // operation into two double-precision operations, which might not be
16367 // interesting for all targets, especially GPUs.
16368 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16369 SDValue CvtSrc = N1.getOperand(0);
16370 SDValue N100 = CvtSrc.getOperand(0);
16371 SDValue N101 = CvtSrc.getOperand(1);
16372 SDValue N102 = CvtSrc.getOperand(2);
16373 if (isContractableAndReassociableFMUL(N102) &&
16374 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16375 CvtSrc.getValueType())) {
16376 SDValue N1020 = N102.getOperand(0);
16377 SDValue N1021 = N102.getOperand(1);
16378 return matcher.getNode(
16379 PreferredFusedOpcode, SL, VT,
16380 matcher.getNode(ISD::FNEG, SL, VT,
16381 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16382 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16383 matcher.getNode(
16384 PreferredFusedOpcode, SL, VT,
16385 matcher.getNode(ISD::FNEG, SL, VT,
16386 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16387 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16388 }
16389 }
16390 }
16391
16392 return SDValue();
16393}
16394
16395/// Try to perform FMA combining on a given FMUL node based on the distributive
16396/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16397/// subtraction instead of addition).
16398SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16399 SDValue N0 = N->getOperand(0);
16400 SDValue N1 = N->getOperand(1);
16401 EVT VT = N->getValueType(0);
16402 SDLoc SL(N);
16403
16404 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16405
16406 const TargetOptions &Options = DAG.getTarget().Options;
16407
16408 // The transforms below are incorrect when x == 0 and y == inf, because the
16409 // intermediate multiplication produces a nan.
16410 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16411 if (!hasNoInfs(Options, FAdd))
16412 return SDValue();
16413
16414 // Floating-point multiply-add without intermediate rounding.
16415 bool HasFMA =
16418 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16419
16420 // Floating-point multiply-add with intermediate rounding. This can result
16421 // in a less precise result due to the changed rounding order.
16422 bool HasFMAD = Options.UnsafeFPMath &&
16423 (LegalOperations && TLI.isFMADLegal(DAG, N));
16424
16425 // No valid opcode, do not combine.
16426 if (!HasFMAD && !HasFMA)
16427 return SDValue();
16428
16429 // Always prefer FMAD to FMA for precision.
16430 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16432
16433 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16434 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16435 auto FuseFADD = [&](SDValue X, SDValue Y) {
16436 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16437 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16438 if (C->isExactlyValue(+1.0))
16439 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16440 Y);
16441 if (C->isExactlyValue(-1.0))
16442 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16443 DAG.getNode(ISD::FNEG, SL, VT, Y));
16444 }
16445 }
16446 return SDValue();
16447 };
16448
16449 if (SDValue FMA = FuseFADD(N0, N1))
16450 return FMA;
16451 if (SDValue FMA = FuseFADD(N1, N0))
16452 return FMA;
16453
16454 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16455 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16456 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16457 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16458 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16459 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16460 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16461 if (C0->isExactlyValue(+1.0))
16462 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16463 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16464 Y);
16465 if (C0->isExactlyValue(-1.0))
16466 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16467 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16468 DAG.getNode(ISD::FNEG, SL, VT, Y));
16469 }
16470 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16471 if (C1->isExactlyValue(+1.0))
16472 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16473 DAG.getNode(ISD::FNEG, SL, VT, Y));
16474 if (C1->isExactlyValue(-1.0))
16475 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16476 Y);
16477 }
16478 }
16479 return SDValue();
16480 };
16481
16482 if (SDValue FMA = FuseFSUB(N0, N1))
16483 return FMA;
16484 if (SDValue FMA = FuseFSUB(N1, N0))
16485 return FMA;
16486
16487 return SDValue();
16488}
16489
16490SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16491 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16492
16493 // FADD -> FMA combines:
16494 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16495 if (Fused.getOpcode() != ISD::DELETED_NODE)
16496 AddToWorklist(Fused.getNode());
16497 return Fused;
16498 }
16499 return SDValue();
16500}
16501
16502SDValue DAGCombiner::visitFADD(SDNode *N) {
16503 SDValue N0 = N->getOperand(0);
16504 SDValue N1 = N->getOperand(1);
16507 EVT VT = N->getValueType(0);
16508 SDLoc DL(N);
16509 const TargetOptions &Options = DAG.getTarget().Options;
16510 SDNodeFlags Flags = N->getFlags();
16511 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16512
16513 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16514 return R;
16515
16516 // fold (fadd c1, c2) -> c1 + c2
16517 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16518 return C;
16519
16520 // canonicalize constant to RHS
16521 if (N0CFP && !N1CFP)
16522 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16523
16524 // fold vector ops
16525 if (VT.isVector())
16526 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16527 return FoldedVOp;
16528
16529 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16530 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16531 if (N1C && N1C->isZero())
16532 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16533 return N0;
16534
16535 if (SDValue NewSel = foldBinOpIntoSelect(N))
16536 return NewSel;
16537
16538 // fold (fadd A, (fneg B)) -> (fsub A, B)
16539 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16540 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16541 N1, DAG, LegalOperations, ForCodeSize))
16542 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16543
16544 // fold (fadd (fneg A), B) -> (fsub B, A)
16545 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16546 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16547 N0, DAG, LegalOperations, ForCodeSize))
16548 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16549
16550 auto isFMulNegTwo = [](SDValue FMul) {
16551 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16552 return false;
16553 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16554 return C && C->isExactlyValue(-2.0);
16555 };
16556
16557 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16558 if (isFMulNegTwo(N0)) {
16559 SDValue B = N0.getOperand(0);
16560 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16561 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16562 }
16563 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16564 if (isFMulNegTwo(N1)) {
16565 SDValue B = N1.getOperand(0);
16566 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16567 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16568 }
16569
16570 // No FP constant should be created after legalization as Instruction
16571 // Selection pass has a hard time dealing with FP constants.
16572 bool AllowNewConst = (Level < AfterLegalizeDAG);
16573
16574 // If nnan is enabled, fold lots of things.
16575 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16576 // If allowed, fold (fadd (fneg x), x) -> 0.0
16577 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16578 return DAG.getConstantFP(0.0, DL, VT);
16579
16580 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16581 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16582 return DAG.getConstantFP(0.0, DL, VT);
16583 }
16584
16585 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16586 // TODO: break out portions of the transformations below for which Unsafe is
16587 // considered and which do not require both nsz and reassoc
16588 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16589 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16590 AllowNewConst) {
16591 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16592 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16594 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16595 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16596 }
16597
16598 // We can fold chains of FADD's of the same value into multiplications.
16599 // This transform is not safe in general because we are reducing the number
16600 // of rounding steps.
16601 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16602 if (N0.getOpcode() == ISD::FMUL) {
16603 SDNode *CFP00 =
16605 SDNode *CFP01 =
16607
16608 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16609 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16610 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16611 DAG.getConstantFP(1.0, DL, VT));
16612 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16613 }
16614
16615 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16616 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16617 N1.getOperand(0) == N1.getOperand(1) &&
16618 N0.getOperand(0) == N1.getOperand(0)) {
16619 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16620 DAG.getConstantFP(2.0, DL, VT));
16621 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16622 }
16623 }
16624
16625 if (N1.getOpcode() == ISD::FMUL) {
16626 SDNode *CFP10 =
16628 SDNode *CFP11 =
16630
16631 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16632 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16633 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16634 DAG.getConstantFP(1.0, DL, VT));
16635 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16636 }
16637
16638 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16639 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16640 N0.getOperand(0) == N0.getOperand(1) &&
16641 N1.getOperand(0) == N0.getOperand(0)) {
16642 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16643 DAG.getConstantFP(2.0, DL, VT));
16644 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16645 }
16646 }
16647
16648 if (N0.getOpcode() == ISD::FADD) {
16649 SDNode *CFP00 =
16651 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16652 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16653 (N0.getOperand(0) == N1)) {
16654 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16655 DAG.getConstantFP(3.0, DL, VT));
16656 }
16657 }
16658
16659 if (N1.getOpcode() == ISD::FADD) {
16660 SDNode *CFP10 =
16662 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16663 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16664 N1.getOperand(0) == N0) {
16665 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16666 DAG.getConstantFP(3.0, DL, VT));
16667 }
16668 }
16669
16670 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16671 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16672 N0.getOperand(0) == N0.getOperand(1) &&
16673 N1.getOperand(0) == N1.getOperand(1) &&
16674 N0.getOperand(0) == N1.getOperand(0)) {
16675 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16676 DAG.getConstantFP(4.0, DL, VT));
16677 }
16678 }
16679
16680 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16681 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16682 VT, N0, N1, Flags))
16683 return SD;
16684 } // enable-unsafe-fp-math
16685
16686 // FADD -> FMA combines:
16687 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16688 if (Fused.getOpcode() != ISD::DELETED_NODE)
16689 AddToWorklist(Fused.getNode());
16690 return Fused;
16691 }
16692 return SDValue();
16693}
16694
16695SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16696 SDValue Chain = N->getOperand(0);
16697 SDValue N0 = N->getOperand(1);
16698 SDValue N1 = N->getOperand(2);
16699 EVT VT = N->getValueType(0);
16700 EVT ChainVT = N->getValueType(1);
16701 SDLoc DL(N);
16702 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16703
16704 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16705 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16706 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16707 N1, DAG, LegalOperations, ForCodeSize)) {
16708 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16709 {Chain, N0, NegN1});
16710 }
16711
16712 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16713 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16714 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16715 N0, DAG, LegalOperations, ForCodeSize)) {
16716 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16717 {Chain, N1, NegN0});
16718 }
16719 return SDValue();
16720}
16721
16722SDValue DAGCombiner::visitFSUB(SDNode *N) {
16723 SDValue N0 = N->getOperand(0);
16724 SDValue N1 = N->getOperand(1);
16725 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16726 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16727 EVT VT = N->getValueType(0);
16728 SDLoc DL(N);
16729 const TargetOptions &Options = DAG.getTarget().Options;
16730 const SDNodeFlags Flags = N->getFlags();
16731 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16732
16733 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16734 return R;
16735
16736 // fold (fsub c1, c2) -> c1-c2
16737 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16738 return C;
16739
16740 // fold vector ops
16741 if (VT.isVector())
16742 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16743 return FoldedVOp;
16744
16745 if (SDValue NewSel = foldBinOpIntoSelect(N))
16746 return NewSel;
16747
16748 // (fsub A, 0) -> A
16749 if (N1CFP && N1CFP->isZero()) {
16750 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16751 Flags.hasNoSignedZeros()) {
16752 return N0;
16753 }
16754 }
16755
16756 if (N0 == N1) {
16757 // (fsub x, x) -> 0.0
16758 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16759 return DAG.getConstantFP(0.0f, DL, VT);
16760 }
16761
16762 // (fsub -0.0, N1) -> -N1
16763 if (N0CFP && N0CFP->isZero()) {
16764 if (N0CFP->isNegative() ||
16765 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16766 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16767 // flushed to zero, unless all users treat denorms as zero (DAZ).
16768 // FIXME: This transform will change the sign of a NaN and the behavior
16769 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16770 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16771 if (DenormMode == DenormalMode::getIEEE()) {
16772 if (SDValue NegN1 =
16773 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16774 return NegN1;
16775 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16776 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16777 }
16778 }
16779 }
16780
16781 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16782 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16783 N1.getOpcode() == ISD::FADD) {
16784 // X - (X + Y) -> -Y
16785 if (N0 == N1->getOperand(0))
16786 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16787 // X - (Y + X) -> -Y
16788 if (N0 == N1->getOperand(1))
16789 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16790 }
16791
16792 // fold (fsub A, (fneg B)) -> (fadd A, B)
16793 if (SDValue NegN1 =
16794 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16795 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16796
16797 // FSUB -> FMA combines:
16798 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16799 AddToWorklist(Fused.getNode());
16800 return Fused;
16801 }
16802
16803 return SDValue();
16804}
16805
16806// Transform IEEE Floats:
16807// (fmul C, (uitofp Pow2))
16808// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16809// (fdiv C, (uitofp Pow2))
16810// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16811//
16812// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16813// there is no need for more than an add/sub.
16814//
16815// This is valid under the following circumstances:
16816// 1) We are dealing with IEEE floats
16817// 2) C is normal
16818// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16819// TODO: Much of this could also be used for generating `ldexp` on targets the
16820// prefer it.
16821SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16822 EVT VT = N->getValueType(0);
16823 SDValue ConstOp, Pow2Op;
16824
16825 std::optional<int> Mantissa;
16826 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16827 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16828 return false;
16829
16830 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16831 Pow2Op = N->getOperand(1 - ConstOpIdx);
16832 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16833 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16834 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16835 return false;
16836
16837 Pow2Op = Pow2Op.getOperand(0);
16838
16839 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16840 // TODO: We could use knownbits to make this bound more precise.
16841 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16842
16843 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16844 if (CFP == nullptr)
16845 return false;
16846
16847 const APFloat &APF = CFP->getValueAPF();
16848
16849 // Make sure we have normal/ieee constant.
16850 if (!APF.isNormal() || !APF.isIEEE())
16851 return false;
16852
16853 // Make sure the floats exponent is within the bounds that this transform
16854 // produces bitwise equals value.
16855 int CurExp = ilogb(APF);
16856 // FMul by pow2 will only increase exponent.
16857 int MinExp =
16858 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16859 // FDiv by pow2 will only decrease exponent.
16860 int MaxExp =
16861 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16862 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16864 return false;
16865
16866 // Finally make sure we actually know the mantissa for the float type.
16867 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16868 if (!Mantissa)
16869 Mantissa = ThisMantissa;
16870
16871 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16872 };
16873
16874 // TODO: We may be able to include undefs.
16875 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16876 };
16877
16878 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16879 return SDValue();
16880
16881 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16882 return SDValue();
16883
16884 // Get log2 after all other checks have taken place. This is because
16885 // BuildLogBase2 may create a new node.
16886 SDLoc DL(N);
16887 // Get Log2 type with same bitwidth as the float type (VT).
16888 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16889 if (VT.isVector())
16890 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16892
16893 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16894 /*InexpensiveOnly*/ true, NewIntVT);
16895 if (!Log2)
16896 return SDValue();
16897
16898 // Perform actual transform.
16899 SDValue MantissaShiftCnt =
16900 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16901 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16902 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16903 // cast. We could implement that by handle here to handle the casts.
16904 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16905 SDValue ResAsInt =
16906 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16907 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16908 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16909 return ResAsFP;
16910}
16911
16912SDValue DAGCombiner::visitFMUL(SDNode *N) {
16913 SDValue N0 = N->getOperand(0);
16914 SDValue N1 = N->getOperand(1);
16915 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16916 EVT VT = N->getValueType(0);
16917 SDLoc DL(N);
16918 const TargetOptions &Options = DAG.getTarget().Options;
16919 const SDNodeFlags Flags = N->getFlags();
16920 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16921
16922 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16923 return R;
16924
16925 // fold (fmul c1, c2) -> c1*c2
16926 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16927 return C;
16928
16929 // canonicalize constant to RHS
16932 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16933
16934 // fold vector ops
16935 if (VT.isVector())
16936 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16937 return FoldedVOp;
16938
16939 if (SDValue NewSel = foldBinOpIntoSelect(N))
16940 return NewSel;
16941
16942 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16943 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16945 N0.getOpcode() == ISD::FMUL) {
16946 SDValue N00 = N0.getOperand(0);
16947 SDValue N01 = N0.getOperand(1);
16948 // Avoid an infinite loop by making sure that N00 is not a constant
16949 // (the inner multiply has not been constant folded yet).
16952 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16953 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16954 }
16955 }
16956
16957 // Match a special-case: we convert X * 2.0 into fadd.
16958 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16959 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16960 N0.getOperand(0) == N0.getOperand(1)) {
16961 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16962 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16963 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16964 }
16965
16966 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16967 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16968 VT, N0, N1, Flags))
16969 return SD;
16970 }
16971
16972 // fold (fmul X, 2.0) -> (fadd X, X)
16973 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16974 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16975
16976 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16977 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16978 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16979 return DAG.getNode(ISD::FSUB, DL, VT,
16980 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16981 }
16982 }
16983
16984 // -N0 * -N1 --> N0 * N1
16989 SDValue NegN0 =
16990 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16991 if (NegN0) {
16992 HandleSDNode NegN0Handle(NegN0);
16993 SDValue NegN1 =
16994 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16995 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16997 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16998 }
16999
17000 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17001 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17002 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17003 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17004 TLI.isOperationLegal(ISD::FABS, VT)) {
17005 SDValue Select = N0, X = N1;
17006 if (Select.getOpcode() != ISD::SELECT)
17007 std::swap(Select, X);
17008
17009 SDValue Cond = Select.getOperand(0);
17010 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17011 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17012
17013 if (TrueOpnd && FalseOpnd &&
17014 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17015 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17016 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17017 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17018 switch (CC) {
17019 default: break;
17020 case ISD::SETOLT:
17021 case ISD::SETULT:
17022 case ISD::SETOLE:
17023 case ISD::SETULE:
17024 case ISD::SETLT:
17025 case ISD::SETLE:
17026 std::swap(TrueOpnd, FalseOpnd);
17027 [[fallthrough]];
17028 case ISD::SETOGT:
17029 case ISD::SETUGT:
17030 case ISD::SETOGE:
17031 case ISD::SETUGE:
17032 case ISD::SETGT:
17033 case ISD::SETGE:
17034 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17035 TLI.isOperationLegal(ISD::FNEG, VT))
17036 return DAG.getNode(ISD::FNEG, DL, VT,
17037 DAG.getNode(ISD::FABS, DL, VT, X));
17038 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17039 return DAG.getNode(ISD::FABS, DL, VT, X);
17040
17041 break;
17042 }
17043 }
17044 }
17045
17046 // FMUL -> FMA combines:
17047 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17048 AddToWorklist(Fused.getNode());
17049 return Fused;
17050 }
17051
17052 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17053 // able to run.
17054 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17055 return R;
17056
17057 return SDValue();
17058}
17059
17060template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17061 SDValue N0 = N->getOperand(0);
17062 SDValue N1 = N->getOperand(1);
17063 SDValue N2 = N->getOperand(2);
17064 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17065 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17066 EVT VT = N->getValueType(0);
17067 SDLoc DL(N);
17068 const TargetOptions &Options = DAG.getTarget().Options;
17069 // FMA nodes have flags that propagate to the created nodes.
17070 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17071 MatchContextClass matcher(DAG, TLI, N);
17072
17073 // Constant fold FMA.
17074 if (isa<ConstantFPSDNode>(N0) &&
17075 isa<ConstantFPSDNode>(N1) &&
17076 isa<ConstantFPSDNode>(N2)) {
17077 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
17078 }
17079
17080 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17085 SDValue NegN0 =
17086 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17087 if (NegN0) {
17088 HandleSDNode NegN0Handle(NegN0);
17089 SDValue NegN1 =
17090 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17091 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17093 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17094 }
17095
17096 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17097 if (Options.UnsafeFPMath) {
17098 if (N0CFP && N0CFP->isZero())
17099 return N2;
17100 if (N1CFP && N1CFP->isZero())
17101 return N2;
17102 }
17103
17104 // FIXME: Support splat of constant.
17105 if (N0CFP && N0CFP->isExactlyValue(1.0))
17106 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
17107 if (N1CFP && N1CFP->isExactlyValue(1.0))
17108 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
17109
17110 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17113 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
17114
17115 bool CanReassociate =
17116 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17117 if (CanReassociate) {
17118 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17119 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17122 return matcher.getNode(
17123 ISD::FMUL, DL, VT, N0,
17124 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17125 }
17126
17127 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17128 if (matcher.match(N0, ISD::FMUL) &&
17131 return matcher.getNode(
17132 ISD::FMA, DL, VT, N0.getOperand(0),
17133 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17134 }
17135 }
17136
17137 // (fma x, -1, y) -> (fadd (fneg x), y)
17138 // FIXME: Support splat of constant.
17139 if (N1CFP) {
17140 if (N1CFP->isExactlyValue(1.0))
17141 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17142
17143 if (N1CFP->isExactlyValue(-1.0) &&
17144 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17145 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17146 AddToWorklist(RHSNeg.getNode());
17147 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17148 }
17149
17150 // fma (fneg x), K, y -> fma x -K, y
17151 if (matcher.match(N0, ISD::FNEG) &&
17153 (N1.hasOneUse() &&
17154 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17155 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17156 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17157 }
17158 }
17159
17160 // FIXME: Support splat of constant.
17161 if (CanReassociate) {
17162 // (fma x, c, x) -> (fmul x, (c+1))
17163 if (N1CFP && N0 == N2) {
17164 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17165 matcher.getNode(ISD::FADD, DL, VT, N1,
17166 DAG.getConstantFP(1.0, DL, VT)));
17167 }
17168
17169 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17170 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17171 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17172 matcher.getNode(ISD::FADD, DL, VT, N1,
17173 DAG.getConstantFP(-1.0, DL, VT)));
17174 }
17175 }
17176
17177 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17178 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17179 if (!TLI.isFNegFree(VT))
17181 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17182 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17183 return SDValue();
17184}
17185
17186SDValue DAGCombiner::visitFMAD(SDNode *N) {
17187 SDValue N0 = N->getOperand(0);
17188 SDValue N1 = N->getOperand(1);
17189 SDValue N2 = N->getOperand(2);
17190 EVT VT = N->getValueType(0);
17191 SDLoc DL(N);
17192
17193 // Constant fold FMAD.
17194 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17195 isa<ConstantFPSDNode>(N2))
17196 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17197
17198 return SDValue();
17199}
17200
17201// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17202// reciprocal.
17203// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17204// Notice that this is not always beneficial. One reason is different targets
17205// may have different costs for FDIV and FMUL, so sometimes the cost of two
17206// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17207// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17208SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17209 // TODO: Limit this transform based on optsize/minsize - it always creates at
17210 // least 1 extra instruction. But the perf win may be substantial enough
17211 // that only minsize should restrict this.
17212 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17213 const SDNodeFlags Flags = N->getFlags();
17214 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17215 return SDValue();
17216
17217 // Skip if current node is a reciprocal/fneg-reciprocal.
17218 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17219 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17220 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17221 return SDValue();
17222
17223 // Exit early if the target does not want this transform or if there can't
17224 // possibly be enough uses of the divisor to make the transform worthwhile.
17225 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17226
17227 // For splat vectors, scale the number of uses by the splat factor. If we can
17228 // convert the division into a scalar op, that will likely be much faster.
17229 unsigned NumElts = 1;
17230 EVT VT = N->getValueType(0);
17231 if (VT.isVector() && DAG.isSplatValue(N1))
17232 NumElts = VT.getVectorMinNumElements();
17233
17234 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17235 return SDValue();
17236
17237 // Find all FDIV users of the same divisor.
17238 // Use a set because duplicates may be present in the user list.
17240 for (auto *U : N1->uses()) {
17241 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17242 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17243 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17244 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17245 U->getFlags().hasAllowReassociation() &&
17246 U->getFlags().hasNoSignedZeros())
17247 continue;
17248
17249 // This division is eligible for optimization only if global unsafe math
17250 // is enabled or if this division allows reciprocal formation.
17251 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17252 Users.insert(U);
17253 }
17254 }
17255
17256 // Now that we have the actual number of divisor uses, make sure it meets
17257 // the minimum threshold specified by the target.
17258 if ((Users.size() * NumElts) < MinUses)
17259 return SDValue();
17260
17261 SDLoc DL(N);
17262 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17263 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17264
17265 // Dividend / Divisor -> Dividend * Reciprocal
17266 for (auto *U : Users) {
17267 SDValue Dividend = U->getOperand(0);
17268 if (Dividend != FPOne) {
17269 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17270 Reciprocal, Flags);
17271 CombineTo(U, NewNode);
17272 } else if (U != Reciprocal.getNode()) {
17273 // In the absence of fast-math-flags, this user node is always the
17274 // same node as Reciprocal, but with FMF they may be different nodes.
17275 CombineTo(U, Reciprocal);
17276 }
17277 }
17278 return SDValue(N, 0); // N was replaced.
17279}
17280
17281SDValue DAGCombiner::visitFDIV(SDNode *N) {
17282 SDValue N0 = N->getOperand(0);
17283 SDValue N1 = N->getOperand(1);
17284 EVT VT = N->getValueType(0);
17285 SDLoc DL(N);
17286 const TargetOptions &Options = DAG.getTarget().Options;
17287 SDNodeFlags Flags = N->getFlags();
17288 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17289
17290 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17291 return R;
17292
17293 // fold (fdiv c1, c2) -> c1/c2
17294 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17295 return C;
17296
17297 // fold vector ops
17298 if (VT.isVector())
17299 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17300 return FoldedVOp;
17301
17302 if (SDValue NewSel = foldBinOpIntoSelect(N))
17303 return NewSel;
17304
17306 return V;
17307
17308 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17309 // the loss is acceptable with AllowReciprocal.
17310 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17311 // Compute the reciprocal 1.0 / c2.
17312 const APFloat &N1APF = N1CFP->getValueAPF();
17313 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17315 // Only do the transform if the reciprocal is a legal fp immediate that
17316 // isn't too nasty (eg NaN, denormal, ...).
17317 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17318 (st == APFloat::opInexact &&
17319 (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17320 (!LegalOperations ||
17321 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17322 // backend)... we should handle this gracefully after Legalize.
17323 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17325 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17326 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17327 DAG.getConstantFP(Recip, DL, VT));
17328 }
17329
17330 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17331 // If this FDIV is part of a reciprocal square root, it may be folded
17332 // into a target-specific square root estimate instruction.
17333 if (N1.getOpcode() == ISD::FSQRT) {
17334 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17335 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17336 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17337 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17338 if (SDValue RV =
17339 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17340 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17341 AddToWorklist(RV.getNode());
17342 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17343 }
17344 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17345 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17346 if (SDValue RV =
17347 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17348 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17349 AddToWorklist(RV.getNode());
17350 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17351 }
17352 } else if (N1.getOpcode() == ISD::FMUL) {
17353 // Look through an FMUL. Even though this won't remove the FDIV directly,
17354 // it's still worthwhile to get rid of the FSQRT if possible.
17355 SDValue Sqrt, Y;
17356 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17357 Sqrt = N1.getOperand(0);
17358 Y = N1.getOperand(1);
17359 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17360 Sqrt = N1.getOperand(1);
17361 Y = N1.getOperand(0);
17362 }
17363 if (Sqrt.getNode()) {
17364 // If the other multiply operand is known positive, pull it into the
17365 // sqrt. That will eliminate the division if we convert to an estimate.
17366 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17367 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17368 SDValue A;
17369 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17370 A = Y.getOperand(0);
17371 else if (Y == Sqrt.getOperand(0))
17372 A = Y;
17373 if (A) {
17374 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17375 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17376 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17377 SDValue AAZ =
17378 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17379 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17380 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17381
17382 // Estimate creation failed. Clean up speculatively created nodes.
17383 recursivelyDeleteUnusedNodes(AAZ.getNode());
17384 }
17385 }
17386
17387 // We found a FSQRT, so try to make this fold:
17388 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17389 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17390 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17391 AddToWorklist(Div.getNode());
17392 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17393 }
17394 }
17395 }
17396
17397 // Fold into a reciprocal estimate and multiply instead of a real divide.
17398 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17399 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17400 return RV;
17401 }
17402
17403 // Fold X/Sqrt(X) -> Sqrt(X)
17404 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17405 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17406 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17407 return N1;
17408
17409 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17414 SDValue NegN0 =
17415 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17416 if (NegN0) {
17417 HandleSDNode NegN0Handle(NegN0);
17418 SDValue NegN1 =
17419 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17420 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17422 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17423 }
17424
17425 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17426 return R;
17427
17428 return SDValue();
17429}
17430
17431SDValue DAGCombiner::visitFREM(SDNode *N) {
17432 SDValue N0 = N->getOperand(0);
17433 SDValue N1 = N->getOperand(1);
17434 EVT VT = N->getValueType(0);
17435 SDNodeFlags Flags = N->getFlags();
17436 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17437 SDLoc DL(N);
17438
17439 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17440 return R;
17441
17442 // fold (frem c1, c2) -> fmod(c1,c2)
17443 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17444 return C;
17445
17446 if (SDValue NewSel = foldBinOpIntoSelect(N))
17447 return NewSel;
17448
17449 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17450 // power of 2.
17451 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17455 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17456 bool NeedsCopySign =
17457 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17458 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17459 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17460 SDValue MLA;
17462 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17463 N1, N0);
17464 } else {
17465 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17466 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17467 }
17468 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17469 }
17470
17471 return SDValue();
17472}
17473
17474SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17475 SDNodeFlags Flags = N->getFlags();
17476 const TargetOptions &Options = DAG.getTarget().Options;
17477
17478 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17479 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17480 if (!Flags.hasApproximateFuncs() ||
17481 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17482 return SDValue();
17483
17484 SDValue N0 = N->getOperand(0);
17485 if (TLI.isFsqrtCheap(N0, DAG))
17486 return SDValue();
17487
17488 // FSQRT nodes have flags that propagate to the created nodes.
17489 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17490 // transform the fdiv, we may produce a sub-optimal estimate sequence
17491 // because the reciprocal calculation may not have to filter out a
17492 // 0.0 input.
17493 return buildSqrtEstimate(N0, Flags);
17494}
17495
17496/// copysign(x, fp_extend(y)) -> copysign(x, y)
17497/// copysign(x, fp_round(y)) -> copysign(x, y)
17498/// Operands to the functions are the type of X and Y respectively.
17499static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17500 // Always fold no-op FP casts.
17501 if (XTy == YTy)
17502 return true;
17503
17504 // Do not optimize out type conversion of f128 type yet.
17505 // For some targets like x86_64, configuration is changed to keep one f128
17506 // value in one SSE register, but instruction selection cannot handle
17507 // FCOPYSIGN on SSE registers yet.
17508 if (YTy == MVT::f128)
17509 return false;
17510
17512}
17513
17515 SDValue N1 = N->getOperand(1);
17516 if (N1.getOpcode() != ISD::FP_EXTEND &&
17517 N1.getOpcode() != ISD::FP_ROUND)
17518 return false;
17519 EVT N1VT = N1->getValueType(0);
17520 EVT N1Op0VT = N1->getOperand(0).getValueType();
17521 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17522}
17523
17524SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17525 SDValue N0 = N->getOperand(0);
17526 SDValue N1 = N->getOperand(1);
17527 EVT VT = N->getValueType(0);
17528
17529 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17530 if (SDValue C =
17531 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17532 return C;
17533
17534 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17535 const APFloat &V = N1C->getValueAPF();
17536 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17537 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17538 if (!V.isNegative()) {
17539 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17540 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17541 } else {
17542 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17543 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17544 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17545 }
17546 }
17547
17548 // copysign(fabs(x), y) -> copysign(x, y)
17549 // copysign(fneg(x), y) -> copysign(x, y)
17550 // copysign(copysign(x,z), y) -> copysign(x, y)
17551 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17552 N0.getOpcode() == ISD::FCOPYSIGN)
17553 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17554
17555 // copysign(x, abs(y)) -> abs(x)
17556 if (N1.getOpcode() == ISD::FABS)
17557 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17558
17559 // copysign(x, copysign(y,z)) -> copysign(x, z)
17560 if (N1.getOpcode() == ISD::FCOPYSIGN)
17561 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17562
17563 // copysign(x, fp_extend(y)) -> copysign(x, y)
17564 // copysign(x, fp_round(y)) -> copysign(x, y)
17566 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17567
17568 // We only take the sign bit from the sign operand.
17569 EVT SignVT = N1.getValueType();
17570 if (SimplifyDemandedBits(N1,
17572 return SDValue(N, 0);
17573
17574 // We only take the non-sign bits from the value operand
17575 if (SimplifyDemandedBits(N0,
17577 return SDValue(N, 0);
17578
17579 return SDValue();
17580}
17581
17582SDValue DAGCombiner::visitFPOW(SDNode *N) {
17583 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17584 if (!ExponentC)
17585 return SDValue();
17586 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17587
17588 // Try to convert x ** (1/3) into cube root.
17589 // TODO: Handle the various flavors of long double.
17590 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17591 // Some range near 1/3 should be fine.
17592 EVT VT = N->getValueType(0);
17593 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17594 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17595 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17596 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17597 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17598 // For regular numbers, rounding may cause the results to differ.
17599 // Therefore, we require { nsz ninf nnan afn } for this transform.
17600 // TODO: We could select out the special cases if we don't have nsz/ninf.
17601 SDNodeFlags Flags = N->getFlags();
17602 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17603 !Flags.hasApproximateFuncs())
17604 return SDValue();
17605
17606 // Do not create a cbrt() libcall if the target does not have it, and do not
17607 // turn a pow that has lowering support into a cbrt() libcall.
17608 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17611 return SDValue();
17612
17613 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17614 }
17615
17616 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17617 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17618 // TODO: This could be extended (using a target hook) to handle smaller
17619 // power-of-2 fractional exponents.
17620 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17621 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17622 if (ExponentIs025 || ExponentIs075) {
17623 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17624 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17625 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17626 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17627 // For regular numbers, rounding may cause the results to differ.
17628 // Therefore, we require { nsz ninf afn } for this transform.
17629 // TODO: We could select out the special cases if we don't have nsz/ninf.
17630 SDNodeFlags Flags = N->getFlags();
17631
17632 // We only need no signed zeros for the 0.25 case.
17633 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17634 !Flags.hasApproximateFuncs())
17635 return SDValue();
17636
17637 // Don't double the number of libcalls. We are trying to inline fast code.
17639 return SDValue();
17640
17641 // Assume that libcalls are the smallest code.
17642 // TODO: This restriction should probably be lifted for vectors.
17643 if (ForCodeSize)
17644 return SDValue();
17645
17646 // pow(X, 0.25) --> sqrt(sqrt(X))
17647 SDLoc DL(N);
17648 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17649 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17650 if (ExponentIs025)
17651 return SqrtSqrt;
17652 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17653 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17654 }
17655
17656 return SDValue();
17657}
17658
17660 const TargetLowering &TLI) {
17661 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17662 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17663 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17664 // conversions would return +0.0.
17665 // FIXME: We should be able to use node-level FMF here.
17666 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17667 EVT VT = N->getValueType(0);
17668 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17670 return SDValue();
17671
17672 // fptosi/fptoui round towards zero, so converting from FP to integer and
17673 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17674 SDValue N0 = N->getOperand(0);
17675 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17676 N0.getOperand(0).getValueType() == VT)
17677 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17678
17679 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17680 N0.getOperand(0).getValueType() == VT)
17681 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17682
17683 return SDValue();
17684}
17685
17686SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17687 SDValue N0 = N->getOperand(0);
17688 EVT VT = N->getValueType(0);
17689 EVT OpVT = N0.getValueType();
17690
17691 // [us]itofp(undef) = 0, because the result value is bounded.
17692 if (N0.isUndef())
17693 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17694
17695 // fold (sint_to_fp c1) -> c1fp
17697 // ...but only if the target supports immediate floating-point values
17698 (!LegalOperations ||
17700 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17701
17702 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17703 // but UINT_TO_FP is legal on this target, try to convert.
17704 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17705 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17706 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17707 if (DAG.SignBitIsZero(N0))
17708 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17709 }
17710
17711 // The next optimizations are desirable only if SELECT_CC can be lowered.
17712 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17713 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17714 !VT.isVector() &&
17715 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17716 SDLoc DL(N);
17717 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17718 DAG.getConstantFP(0.0, DL, VT));
17719 }
17720
17721 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17722 // (select (setcc x, y, cc), 1.0, 0.0)
17723 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17724 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17725 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17726 SDLoc DL(N);
17727 return DAG.getSelect(DL, VT, N0.getOperand(0),
17728 DAG.getConstantFP(1.0, DL, VT),
17729 DAG.getConstantFP(0.0, DL, VT));
17730 }
17731
17732 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17733 return FTrunc;
17734
17735 return SDValue();
17736}
17737
17738SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17739 SDValue N0 = N->getOperand(0);
17740 EVT VT = N->getValueType(0);
17741 EVT OpVT = N0.getValueType();
17742
17743 // [us]itofp(undef) = 0, because the result value is bounded.
17744 if (N0.isUndef())
17745 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17746
17747 // fold (uint_to_fp c1) -> c1fp
17749 // ...but only if the target supports immediate floating-point values
17750 (!LegalOperations ||
17752 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17753
17754 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17755 // but SINT_TO_FP is legal on this target, try to convert.
17756 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17757 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17758 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17759 if (DAG.SignBitIsZero(N0))
17760 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17761 }
17762
17763 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17764 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17765 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17766 SDLoc DL(N);
17767 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17768 DAG.getConstantFP(0.0, DL, VT));
17769 }
17770
17771 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17772 return FTrunc;
17773
17774 return SDValue();
17775}
17776
17777// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17779 SDValue N0 = N->getOperand(0);
17780 EVT VT = N->getValueType(0);
17781
17782 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17783 return SDValue();
17784
17785 SDValue Src = N0.getOperand(0);
17786 EVT SrcVT = Src.getValueType();
17787 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17788 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17789
17790 // We can safely assume the conversion won't overflow the output range,
17791 // because (for example) (uint8_t)18293.f is undefined behavior.
17792
17793 // Since we can assume the conversion won't overflow, our decision as to
17794 // whether the input will fit in the float should depend on the minimum
17795 // of the input range and output range.
17796
17797 // This means this is also safe for a signed input and unsigned output, since
17798 // a negative input would lead to undefined behavior.
17799 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17800 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17801 unsigned ActualSize = std::min(InputSize, OutputSize);
17802 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17803
17804 // We can only fold away the float conversion if the input range can be
17805 // represented exactly in the float range.
17806 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17807 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17808 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17810 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17811 }
17812 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17813 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17814 return DAG.getBitcast(VT, Src);
17815 }
17816 return SDValue();
17817}
17818
17819SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17820 SDValue N0 = N->getOperand(0);
17821 EVT VT = N->getValueType(0);
17822
17823 // fold (fp_to_sint undef) -> undef
17824 if (N0.isUndef())
17825 return DAG.getUNDEF(VT);
17826
17827 // fold (fp_to_sint c1fp) -> c1
17829 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17830
17831 return FoldIntToFPToInt(N, DAG);
17832}
17833
17834SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17835 SDValue N0 = N->getOperand(0);
17836 EVT VT = N->getValueType(0);
17837
17838 // fold (fp_to_uint undef) -> undef
17839 if (N0.isUndef())
17840 return DAG.getUNDEF(VT);
17841
17842 // fold (fp_to_uint c1fp) -> c1
17844 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17845
17846 return FoldIntToFPToInt(N, DAG);
17847}
17848
17849SDValue DAGCombiner::visitXRINT(SDNode *N) {
17850 SDValue N0 = N->getOperand(0);
17851 EVT VT = N->getValueType(0);
17852
17853 // fold (lrint|llrint undef) -> undef
17854 if (N0.isUndef())
17855 return DAG.getUNDEF(VT);
17856
17857 // fold (lrint|llrint c1fp) -> c1
17859 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17860
17861 return SDValue();
17862}
17863
17864SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17865 SDValue N0 = N->getOperand(0);
17866 SDValue N1 = N->getOperand(1);
17867 EVT VT = N->getValueType(0);
17868
17869 // fold (fp_round c1fp) -> c1fp
17870 if (SDValue C =
17871 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17872 return C;
17873
17874 // fold (fp_round (fp_extend x)) -> x
17875 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17876 return N0.getOperand(0);
17877
17878 // fold (fp_round (fp_round x)) -> (fp_round x)
17879 if (N0.getOpcode() == ISD::FP_ROUND) {
17880 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17881 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17882
17883 // Avoid folding legal fp_rounds into non-legal ones.
17884 if (!hasOperation(ISD::FP_ROUND, VT))
17885 return SDValue();
17886
17887 // Skip this folding if it results in an fp_round from f80 to f16.
17888 //
17889 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17890 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17891 // instructions from f32 or f64. Moreover, the first (value-preserving)
17892 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17893 // x86.
17894 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17895 return SDValue();
17896
17897 // If the first fp_round isn't a value preserving truncation, it might
17898 // introduce a tie in the second fp_round, that wouldn't occur in the
17899 // single-step fp_round we want to fold to.
17900 // In other words, double rounding isn't the same as rounding.
17901 // Also, this is a value preserving truncation iff both fp_round's are.
17902 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17903 SDLoc DL(N);
17904 return DAG.getNode(
17905 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17906 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17907 }
17908 }
17909
17910 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17911 // Note: From a legality perspective, this is a two step transform. First,
17912 // we duplicate the fp_round to the arguments of the copysign, then we
17913 // eliminate the fp_round on Y. The second step requires an additional
17914 // predicate to match the implementation above.
17915 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17917 N0.getValueType())) {
17918 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17919 N0.getOperand(0), N1);
17920 AddToWorklist(Tmp.getNode());
17921 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17922 Tmp, N0.getOperand(1));
17923 }
17924
17925 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17926 return NewVSel;
17927
17928 return SDValue();
17929}
17930
17931SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17932 SDValue N0 = N->getOperand(0);
17933 EVT VT = N->getValueType(0);
17934
17935 if (VT.isVector())
17936 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17937 return FoldedVOp;
17938
17939 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17940 if (N->hasOneUse() &&
17941 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17942 return SDValue();
17943
17944 // fold (fp_extend c1fp) -> c1fp
17946 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17947
17948 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17949 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17951 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17952
17953 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17954 // value of X.
17955 if (N0.getOpcode() == ISD::FP_ROUND
17956 && N0.getConstantOperandVal(1) == 1) {
17957 SDValue In = N0.getOperand(0);
17958 if (In.getValueType() == VT) return In;
17959 if (VT.bitsLT(In.getValueType()))
17960 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17961 In, N0.getOperand(1));
17962 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17963 }
17964
17965 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17966 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17968 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17969 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17970 LN0->getChain(),
17971 LN0->getBasePtr(), N0.getValueType(),
17972 LN0->getMemOperand());
17973 CombineTo(N, ExtLoad);
17974 CombineTo(
17975 N0.getNode(),
17976 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17977 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17978 ExtLoad.getValue(1));
17979 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17980 }
17981
17982 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17983 return NewVSel;
17984
17985 return SDValue();
17986}
17987
17988SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17989 SDValue N0 = N->getOperand(0);
17990 EVT VT = N->getValueType(0);
17991
17992 // fold (fceil c1) -> fceil(c1)
17994 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17995
17996 return SDValue();
17997}
17998
17999SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
18000 SDValue N0 = N->getOperand(0);
18001 EVT VT = N->getValueType(0);
18002
18003 // fold (ftrunc c1) -> ftrunc(c1)
18005 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
18006
18007 // fold ftrunc (known rounded int x) -> x
18008 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
18009 // likely to be generated to extract integer from a rounded floating value.
18010 switch (N0.getOpcode()) {
18011 default: break;
18012 case ISD::FRINT:
18013 case ISD::FTRUNC:
18014 case ISD::FNEARBYINT:
18015 case ISD::FROUNDEVEN:
18016 case ISD::FFLOOR:
18017 case ISD::FCEIL:
18018 return N0;
18019 }
18020
18021 return SDValue();
18022}
18023
18024SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18025 SDValue N0 = N->getOperand(0);
18026
18027 // fold (ffrexp c1) -> ffrexp(c1)
18029 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18030 return SDValue();
18031}
18032
18033SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18034 SDValue N0 = N->getOperand(0);
18035 EVT VT = N->getValueType(0);
18036
18037 // fold (ffloor c1) -> ffloor(c1)
18039 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
18040
18041 return SDValue();
18042}
18043
18044SDValue DAGCombiner::visitFNEG(SDNode *N) {
18045 SDValue N0 = N->getOperand(0);
18046 EVT VT = N->getValueType(0);
18047 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18048
18049 // Constant fold FNEG.
18051 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
18052
18053 if (SDValue NegN0 =
18054 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18055 return NegN0;
18056
18057 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18058 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18059 // know it was called from a context with a nsz flag if the input fsub does
18060 // not.
18061 if (N0.getOpcode() == ISD::FSUB &&
18063 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18064 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18065 N0.getOperand(0));
18066 }
18067
18068 if (SDValue Cast = foldSignChangeInBitcast(N))
18069 return Cast;
18070
18071 return SDValue();
18072}
18073
18074SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18075 SDValue N0 = N->getOperand(0);
18076 SDValue N1 = N->getOperand(1);
18077 EVT VT = N->getValueType(0);
18078 const SDNodeFlags Flags = N->getFlags();
18079 unsigned Opc = N->getOpcode();
18080 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18081 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18082 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18083
18084 // Constant fold.
18085 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18086 return C;
18087
18088 // Canonicalize to constant on RHS.
18091 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18092
18093 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18094 const APFloat &AF = N1CFP->getValueAPF();
18095
18096 // minnum(X, nan) -> X
18097 // maxnum(X, nan) -> X
18098 // minimum(X, nan) -> nan
18099 // maximum(X, nan) -> nan
18100 if (AF.isNaN())
18101 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18102
18103 // In the following folds, inf can be replaced with the largest finite
18104 // float, if the ninf flag is set.
18105 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18106 // minnum(X, -inf) -> -inf
18107 // maxnum(X, +inf) -> +inf
18108 // minimum(X, -inf) -> -inf if nnan
18109 // maximum(X, +inf) -> +inf if nnan
18110 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18111 return N->getOperand(1);
18112
18113 // minnum(X, +inf) -> X if nnan
18114 // maxnum(X, -inf) -> X if nnan
18115 // minimum(X, +inf) -> X
18116 // maximum(X, -inf) -> X
18117 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18118 return N->getOperand(0);
18119 }
18120 }
18121
18122 if (SDValue SD = reassociateReduction(
18123 PropagatesNaN
18126 Opc, SDLoc(N), VT, N0, N1, Flags))
18127 return SD;
18128
18129 return SDValue();
18130}
18131
18132SDValue DAGCombiner::visitFABS(SDNode *N) {
18133 SDValue N0 = N->getOperand(0);
18134 EVT VT = N->getValueType(0);
18135
18136 // fold (fabs c1) -> fabs(c1)
18138 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
18139
18140 // fold (fabs (fabs x)) -> (fabs x)
18141 if (N0.getOpcode() == ISD::FABS)
18142 return N->getOperand(0);
18143
18144 // fold (fabs (fneg x)) -> (fabs x)
18145 // fold (fabs (fcopysign x, y)) -> (fabs x)
18146 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18147 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
18148
18149 if (SDValue Cast = foldSignChangeInBitcast(N))
18150 return Cast;
18151
18152 return SDValue();
18153}
18154
18155SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18156 SDValue Chain = N->getOperand(0);
18157 SDValue N1 = N->getOperand(1);
18158 SDValue N2 = N->getOperand(2);
18159
18160 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18161 // nondeterministic jumps).
18162 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18163 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18164 N1->getOperand(0), N2);
18165 }
18166
18167 // Variant of the previous fold where there is a SETCC in between:
18168 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18169 // =>
18170 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18171 // =>
18172 // BRCOND(SETCC(X, CONST, Cond))
18173 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18174 // isn't equivalent to true or false.
18175 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18176 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18177 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18178 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18179 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18180 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18181 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18182 bool Updated = false;
18183
18184 // Is 'X Cond C' always true or false?
18185 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18186 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18187 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18188 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18189 (Cond == ISD::SETGT && C->isMaxSignedValue());
18190 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18191 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18192 (Cond == ISD::SETUGE && C->isZero()) ||
18193 (Cond == ISD::SETGE && C->isMinSignedValue());
18194 return True || False;
18195 };
18196
18197 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18198 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18199 S0 = S0->getOperand(0);
18200 Updated = true;
18201 }
18202 }
18203 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18204 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18205 S1 = S1->getOperand(0);
18206 Updated = true;
18207 }
18208 }
18209
18210 if (Updated)
18211 return DAG.getNode(
18212 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18213 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
18214 }
18215
18216 // If N is a constant we could fold this into a fallthrough or unconditional
18217 // branch. However that doesn't happen very often in normal code, because
18218 // Instcombine/SimplifyCFG should have handled the available opportunities.
18219 // If we did this folding here, it would be necessary to update the
18220 // MachineBasicBlock CFG, which is awkward.
18221
18222 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18223 // on the target.
18224 if (N1.getOpcode() == ISD::SETCC &&
18226 N1.getOperand(0).getValueType())) {
18227 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18228 Chain, N1.getOperand(2),
18229 N1.getOperand(0), N1.getOperand(1), N2);
18230 }
18231
18232 if (N1.hasOneUse()) {
18233 // rebuildSetCC calls visitXor which may change the Chain when there is a
18234 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18235 HandleSDNode ChainHandle(Chain);
18236 if (SDValue NewN1 = rebuildSetCC(N1))
18237 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18238 ChainHandle.getValue(), NewN1, N2);
18239 }
18240
18241 return SDValue();
18242}
18243
18244SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18245 if (N.getOpcode() == ISD::SRL ||
18246 (N.getOpcode() == ISD::TRUNCATE &&
18247 (N.getOperand(0).hasOneUse() &&
18248 N.getOperand(0).getOpcode() == ISD::SRL))) {
18249 // Look pass the truncate.
18250 if (N.getOpcode() == ISD::TRUNCATE)
18251 N = N.getOperand(0);
18252
18253 // Match this pattern so that we can generate simpler code:
18254 //
18255 // %a = ...
18256 // %b = and i32 %a, 2
18257 // %c = srl i32 %b, 1
18258 // brcond i32 %c ...
18259 //
18260 // into
18261 //
18262 // %a = ...
18263 // %b = and i32 %a, 2
18264 // %c = setcc eq %b, 0
18265 // brcond %c ...
18266 //
18267 // This applies only when the AND constant value has one bit set and the
18268 // SRL constant is equal to the log2 of the AND constant. The back-end is
18269 // smart enough to convert the result into a TEST/JMP sequence.
18270 SDValue Op0 = N.getOperand(0);
18271 SDValue Op1 = N.getOperand(1);
18272
18273 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18274 SDValue AndOp1 = Op0.getOperand(1);
18275
18276 if (AndOp1.getOpcode() == ISD::Constant) {
18277 const APInt &AndConst = AndOp1->getAsAPIntVal();
18278
18279 if (AndConst.isPowerOf2() &&
18280 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18281 SDLoc DL(N);
18282 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18283 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18284 ISD::SETNE);
18285 }
18286 }
18287 }
18288 }
18289
18290 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18291 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18292 if (N.getOpcode() == ISD::XOR) {
18293 // Because we may call this on a speculatively constructed
18294 // SimplifiedSetCC Node, we need to simplify this node first.
18295 // Ideally this should be folded into SimplifySetCC and not
18296 // here. For now, grab a handle to N so we don't lose it from
18297 // replacements interal to the visit.
18298 HandleSDNode XORHandle(N);
18299 while (N.getOpcode() == ISD::XOR) {
18300 SDValue Tmp = visitXOR(N.getNode());
18301 // No simplification done.
18302 if (!Tmp.getNode())
18303 break;
18304 // Returning N is form in-visit replacement that may invalidated
18305 // N. Grab value from Handle.
18306 if (Tmp.getNode() == N.getNode())
18307 N = XORHandle.getValue();
18308 else // Node simplified. Try simplifying again.
18309 N = Tmp;
18310 }
18311
18312 if (N.getOpcode() != ISD::XOR)
18313 return N;
18314
18315 SDValue Op0 = N->getOperand(0);
18316 SDValue Op1 = N->getOperand(1);
18317
18318 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18319 bool Equal = false;
18320 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18321 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18322 Op0.getValueType() == MVT::i1) {
18323 N = Op0;
18324 Op0 = N->getOperand(0);
18325 Op1 = N->getOperand(1);
18326 Equal = true;
18327 }
18328
18329 EVT SetCCVT = N.getValueType();
18330 if (LegalTypes)
18331 SetCCVT = getSetCCResultType(SetCCVT);
18332 // Replace the uses of XOR with SETCC
18333 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18334 Equal ? ISD::SETEQ : ISD::SETNE);
18335 }
18336 }
18337
18338 return SDValue();
18339}
18340
18341// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18342//
18343SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18344 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18345 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18346
18347 // If N is a constant we could fold this into a fallthrough or unconditional
18348 // branch. However that doesn't happen very often in normal code, because
18349 // Instcombine/SimplifyCFG should have handled the available opportunities.
18350 // If we did this folding here, it would be necessary to update the
18351 // MachineBasicBlock CFG, which is awkward.
18352
18353 // Use SimplifySetCC to simplify SETCC's.
18355 CondLHS, CondRHS, CC->get(), SDLoc(N),
18356 false);
18357 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18358
18359 // fold to a simpler setcc
18360 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18361 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18362 N->getOperand(0), Simp.getOperand(2),
18363 Simp.getOperand(0), Simp.getOperand(1),
18364 N->getOperand(4));
18365
18366 return SDValue();
18367}
18368
18369static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18370 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18371 const TargetLowering &TLI) {
18372 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18373 if (LD->isIndexed())
18374 return false;
18375 EVT VT = LD->getMemoryVT();
18376 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18377 return false;
18378 Ptr = LD->getBasePtr();
18379 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18380 if (ST->isIndexed())
18381 return false;
18382 EVT VT = ST->getMemoryVT();
18383 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18384 return false;
18385 Ptr = ST->getBasePtr();
18386 IsLoad = false;
18387 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18388 if (LD->isIndexed())
18389 return false;
18390 EVT VT = LD->getMemoryVT();
18391 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18392 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18393 return false;
18394 Ptr = LD->getBasePtr();
18395 IsMasked = true;
18396 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18397 if (ST->isIndexed())
18398 return false;
18399 EVT VT = ST->getMemoryVT();
18400 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18401 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18402 return false;
18403 Ptr = ST->getBasePtr();
18404 IsLoad = false;
18405 IsMasked = true;
18406 } else {
18407 return false;
18408 }
18409 return true;
18410}
18411
18412/// Try turning a load/store into a pre-indexed load/store when the base
18413/// pointer is an add or subtract and it has other uses besides the load/store.
18414/// After the transformation, the new indexed load/store has effectively folded
18415/// the add/subtract in and all of its other uses are redirected to the
18416/// new load/store.
18417bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18418 if (Level < AfterLegalizeDAG)
18419 return false;
18420
18421 bool IsLoad = true;
18422 bool IsMasked = false;
18423 SDValue Ptr;
18424 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18425 Ptr, TLI))
18426 return false;
18427
18428 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18429 // out. There is no reason to make this a preinc/predec.
18430 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18431 Ptr->hasOneUse())
18432 return false;
18433
18434 // Ask the target to do addressing mode selection.
18438 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18439 return false;
18440
18441 // Backends without true r+i pre-indexed forms may need to pass a
18442 // constant base with a variable offset so that constant coercion
18443 // will work with the patterns in canonical form.
18444 bool Swapped = false;
18445 if (isa<ConstantSDNode>(BasePtr)) {
18446 std::swap(BasePtr, Offset);
18447 Swapped = true;
18448 }
18449
18450 // Don't create a indexed load / store with zero offset.
18452 return false;
18453
18454 // Try turning it into a pre-indexed load / store except when:
18455 // 1) The new base ptr is a frame index.
18456 // 2) If N is a store and the new base ptr is either the same as or is a
18457 // predecessor of the value being stored.
18458 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18459 // that would create a cycle.
18460 // 4) All uses are load / store ops that use it as old base ptr.
18461
18462 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18463 // (plus the implicit offset) to a register to preinc anyway.
18464 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18465 return false;
18466
18467 // Check #2.
18468 if (!IsLoad) {
18469 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18470 : cast<StoreSDNode>(N)->getValue();
18471
18472 // Would require a copy.
18473 if (Val == BasePtr)
18474 return false;
18475
18476 // Would create a cycle.
18477 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18478 return false;
18479 }
18480
18481 // Caches for hasPredecessorHelper.
18484 Worklist.push_back(N);
18485
18486 // If the offset is a constant, there may be other adds of constants that
18487 // can be folded with this one. We should do this to avoid having to keep
18488 // a copy of the original base pointer.
18489 SmallVector<SDNode *, 16> OtherUses;
18490 constexpr unsigned int MaxSteps = 8192;
18491 if (isa<ConstantSDNode>(Offset))
18492 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18493 UE = BasePtr->use_end();
18494 UI != UE; ++UI) {
18495 SDUse &Use = UI.getUse();
18496 // Skip the use that is Ptr and uses of other results from BasePtr's
18497 // node (important for nodes that return multiple results).
18498 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18499 continue;
18500
18501 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18502 MaxSteps))
18503 continue;
18504
18505 if (Use.getUser()->getOpcode() != ISD::ADD &&
18506 Use.getUser()->getOpcode() != ISD::SUB) {
18507 OtherUses.clear();
18508 break;
18509 }
18510
18511 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18512 if (!isa<ConstantSDNode>(Op1)) {
18513 OtherUses.clear();
18514 break;
18515 }
18516
18517 // FIXME: In some cases, we can be smarter about this.
18518 if (Op1.getValueType() != Offset.getValueType()) {
18519 OtherUses.clear();
18520 break;
18521 }
18522
18523 OtherUses.push_back(Use.getUser());
18524 }
18525
18526 if (Swapped)
18527 std::swap(BasePtr, Offset);
18528
18529 // Now check for #3 and #4.
18530 bool RealUse = false;
18531
18532 for (SDNode *Use : Ptr->uses()) {
18533 if (Use == N)
18534 continue;
18535 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18536 return false;
18537
18538 // If Ptr may be folded in addressing mode of other use, then it's
18539 // not profitable to do this transformation.
18540 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18541 RealUse = true;
18542 }
18543
18544 if (!RealUse)
18545 return false;
18546
18548 if (!IsMasked) {
18549 if (IsLoad)
18550 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18551 else
18552 Result =
18553 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18554 } else {
18555 if (IsLoad)
18556 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18557 Offset, AM);
18558 else
18559 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18560 Offset, AM);
18561 }
18562 ++PreIndexedNodes;
18563 ++NodesCombined;
18564 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18565 Result.dump(&DAG); dbgs() << '\n');
18566 WorklistRemover DeadNodes(*this);
18567 if (IsLoad) {
18568 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18569 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18570 } else {
18571 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18572 }
18573
18574 // Finally, since the node is now dead, remove it from the graph.
18575 deleteAndRecombine(N);
18576
18577 if (Swapped)
18578 std::swap(BasePtr, Offset);
18579
18580 // Replace other uses of BasePtr that can be updated to use Ptr
18581 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18582 unsigned OffsetIdx = 1;
18583 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18584 OffsetIdx = 0;
18585 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18586 BasePtr.getNode() && "Expected BasePtr operand");
18587
18588 // We need to replace ptr0 in the following expression:
18589 // x0 * offset0 + y0 * ptr0 = t0
18590 // knowing that
18591 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18592 //
18593 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18594 // indexed load/store and the expression that needs to be re-written.
18595 //
18596 // Therefore, we have:
18597 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18598
18599 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18600 const APInt &Offset0 = CN->getAPIntValue();
18601 const APInt &Offset1 = Offset->getAsAPIntVal();
18602 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18603 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18604 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18605 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18606
18607 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18608
18609 APInt CNV = Offset0;
18610 if (X0 < 0) CNV = -CNV;
18611 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18612 else CNV = CNV - Offset1;
18613
18614 SDLoc DL(OtherUses[i]);
18615
18616 // We can now generate the new expression.
18617 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18618 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18619
18620 SDValue NewUse = DAG.getNode(Opcode,
18621 DL,
18622 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18623 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18624 deleteAndRecombine(OtherUses[i]);
18625 }
18626
18627 // Replace the uses of Ptr with uses of the updated base value.
18628 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18629 deleteAndRecombine(Ptr.getNode());
18630 AddToWorklist(Result.getNode());
18631
18632 return true;
18633}
18634
18636 SDValue &BasePtr, SDValue &Offset,
18638 SelectionDAG &DAG,
18639 const TargetLowering &TLI) {
18640 if (PtrUse == N ||
18641 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18642 return false;
18643
18644 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18645 return false;
18646
18647 // Don't create a indexed load / store with zero offset.
18649 return false;
18650
18651 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18652 return false;
18653
18655 for (SDNode *Use : BasePtr->uses()) {
18656 if (Use == Ptr.getNode())
18657 continue;
18658
18659 // No if there's a later user which could perform the index instead.
18660 if (isa<MemSDNode>(Use)) {
18661 bool IsLoad = true;
18662 bool IsMasked = false;
18663 SDValue OtherPtr;
18665 IsMasked, OtherPtr, TLI)) {
18667 Worklist.push_back(Use);
18668 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18669 return false;
18670 }
18671 }
18672
18673 // If all the uses are load / store addresses, then don't do the
18674 // transformation.
18675 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18676 for (SDNode *UseUse : Use->uses())
18677 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18678 return false;
18679 }
18680 }
18681 return true;
18682}
18683
18685 bool &IsMasked, SDValue &Ptr,
18686 SDValue &BasePtr, SDValue &Offset,
18688 SelectionDAG &DAG,
18689 const TargetLowering &TLI) {
18691 IsMasked, Ptr, TLI) ||
18692 Ptr->hasOneUse())
18693 return nullptr;
18694
18695 // Try turning it into a post-indexed load / store except when
18696 // 1) All uses are load / store ops that use it as base ptr (and
18697 // it may be folded as addressing mmode).
18698 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18699 // nor a successor of N. Otherwise, if Op is folded that would
18700 // create a cycle.
18701 for (SDNode *Op : Ptr->uses()) {
18702 // Check for #1.
18703 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18704 continue;
18705
18706 // Check for #2.
18709 constexpr unsigned int MaxSteps = 8192;
18710 // Ptr is predecessor to both N and Op.
18711 Visited.insert(Ptr.getNode());
18712 Worklist.push_back(N);
18713 Worklist.push_back(Op);
18714 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18715 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18716 return Op;
18717 }
18718 return nullptr;
18719}
18720
18721/// Try to combine a load/store with a add/sub of the base pointer node into a
18722/// post-indexed load/store. The transformation folded the add/subtract into the
18723/// new indexed load/store effectively and all of its uses are redirected to the
18724/// new load/store.
18725bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18726 if (Level < AfterLegalizeDAG)
18727 return false;
18728
18729 bool IsLoad = true;
18730 bool IsMasked = false;
18731 SDValue Ptr;
18735 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18736 Offset, AM, DAG, TLI);
18737 if (!Op)
18738 return false;
18739
18741 if (!IsMasked)
18742 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18743 Offset, AM)
18744 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18745 BasePtr, Offset, AM);
18746 else
18747 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18748 BasePtr, Offset, AM)
18750 BasePtr, Offset, AM);
18751 ++PostIndexedNodes;
18752 ++NodesCombined;
18753 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18754 Result.dump(&DAG); dbgs() << '\n');
18755 WorklistRemover DeadNodes(*this);
18756 if (IsLoad) {
18757 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18758 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18759 } else {
18760 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18761 }
18762
18763 // Finally, since the node is now dead, remove it from the graph.
18764 deleteAndRecombine(N);
18765
18766 // Replace the uses of Use with uses of the updated base value.
18768 Result.getValue(IsLoad ? 1 : 0));
18769 deleteAndRecombine(Op);
18770 return true;
18771}
18772
18773/// Return the base-pointer arithmetic from an indexed \p LD.
18774SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18775 ISD::MemIndexedMode AM = LD->getAddressingMode();
18776 assert(AM != ISD::UNINDEXED);
18777 SDValue BP = LD->getOperand(1);
18778 SDValue Inc = LD->getOperand(2);
18779
18780 // Some backends use TargetConstants for load offsets, but don't expect
18781 // TargetConstants in general ADD nodes. We can convert these constants into
18782 // regular Constants (if the constant is not opaque).
18784 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18785 "Cannot split out indexing using opaque target constants");
18786 if (Inc.getOpcode() == ISD::TargetConstant) {
18787 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18788 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18789 ConstInc->getValueType(0));
18790 }
18791
18792 unsigned Opc =
18793 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18794 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18795}
18796
18798 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18799}
18800
18801bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18802 EVT STType = Val.getValueType();
18803 EVT STMemType = ST->getMemoryVT();
18804 if (STType == STMemType)
18805 return true;
18806 if (isTypeLegal(STMemType))
18807 return false; // fail.
18808 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18809 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18810 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18811 return true;
18812 }
18813 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18814 STType.isInteger() && STMemType.isInteger()) {
18815 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18816 return true;
18817 }
18818 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18819 Val = DAG.getBitcast(STMemType, Val);
18820 return true;
18821 }
18822 return false; // fail.
18823}
18824
18825bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18826 EVT LDMemType = LD->getMemoryVT();
18827 EVT LDType = LD->getValueType(0);
18828 assert(Val.getValueType() == LDMemType &&
18829 "Attempting to extend value of non-matching type");
18830 if (LDType == LDMemType)
18831 return true;
18832 if (LDMemType.isInteger() && LDType.isInteger()) {
18833 switch (LD->getExtensionType()) {
18834 case ISD::NON_EXTLOAD:
18835 Val = DAG.getBitcast(LDType, Val);
18836 return true;
18837 case ISD::EXTLOAD:
18838 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18839 return true;
18840 case ISD::SEXTLOAD:
18841 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18842 return true;
18843 case ISD::ZEXTLOAD:
18844 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18845 return true;
18846 }
18847 }
18848 return false;
18849}
18850
18851StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18852 int64_t &Offset) {
18853 SDValue Chain = LD->getOperand(0);
18854
18855 // Look through CALLSEQ_START.
18856 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18857 Chain = Chain->getOperand(0);
18858
18859 StoreSDNode *ST = nullptr;
18861 if (Chain.getOpcode() == ISD::TokenFactor) {
18862 // Look for unique store within the TokenFactor.
18863 for (SDValue Op : Chain->ops()) {
18864 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18865 if (!Store)
18866 continue;
18867 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18868 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18869 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18870 continue;
18871 // Make sure the store is not aliased with any nodes in TokenFactor.
18872 GatherAllAliases(Store, Chain, Aliases);
18873 if (Aliases.empty() ||
18874 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18875 ST = Store;
18876 break;
18877 }
18878 } else {
18879 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18880 if (Store) {
18881 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18882 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18883 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18884 ST = Store;
18885 }
18886 }
18887
18888 return ST;
18889}
18890
18891SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18892 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18893 return SDValue();
18894 SDValue Chain = LD->getOperand(0);
18895 int64_t Offset;
18896
18897 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18898 // TODO: Relax this restriction for unordered atomics (see D66309)
18899 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18900 return SDValue();
18901
18902 EVT LDType = LD->getValueType(0);
18903 EVT LDMemType = LD->getMemoryVT();
18904 EVT STMemType = ST->getMemoryVT();
18905 EVT STType = ST->getValue().getValueType();
18906
18907 // There are two cases to consider here:
18908 // 1. The store is fixed width and the load is scalable. In this case we
18909 // don't know at compile time if the store completely envelops the load
18910 // so we abandon the optimisation.
18911 // 2. The store is scalable and the load is fixed width. We could
18912 // potentially support a limited number of cases here, but there has been
18913 // no cost-benefit analysis to prove it's worth it.
18914 bool LdStScalable = LDMemType.isScalableVT();
18915 if (LdStScalable != STMemType.isScalableVT())
18916 return SDValue();
18917
18918 // If we are dealing with scalable vectors on a big endian platform the
18919 // calculation of offsets below becomes trickier, since we do not know at
18920 // compile time the absolute size of the vector. Until we've done more
18921 // analysis on big-endian platforms it seems better to bail out for now.
18922 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18923 return SDValue();
18924
18925 // Normalize for Endianness. After this Offset=0 will denote that the least
18926 // significant bit in the loaded value maps to the least significant bit in
18927 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18928 // n:th least significant byte of the stored value.
18929 int64_t OrigOffset = Offset;
18930 if (DAG.getDataLayout().isBigEndian())
18931 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18932 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18933 8 -
18934 Offset;
18935
18936 // Check that the stored value cover all bits that are loaded.
18937 bool STCoversLD;
18938
18939 TypeSize LdMemSize = LDMemType.getSizeInBits();
18940 TypeSize StMemSize = STMemType.getSizeInBits();
18941 if (LdStScalable)
18942 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18943 else
18944 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18945 StMemSize.getFixedValue());
18946
18947 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18948 if (LD->isIndexed()) {
18949 // Cannot handle opaque target constants and we must respect the user's
18950 // request not to split indexes from loads.
18951 if (!canSplitIdx(LD))
18952 return SDValue();
18953 SDValue Idx = SplitIndexingFromLoad(LD);
18954 SDValue Ops[] = {Val, Idx, Chain};
18955 return CombineTo(LD, Ops, 3);
18956 }
18957 return CombineTo(LD, Val, Chain);
18958 };
18959
18960 if (!STCoversLD)
18961 return SDValue();
18962
18963 // Memory as copy space (potentially masked).
18964 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18965 // Simple case: Direct non-truncating forwarding
18966 if (LDType.getSizeInBits() == LdMemSize)
18967 return ReplaceLd(LD, ST->getValue(), Chain);
18968 // Can we model the truncate and extension with an and mask?
18969 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18970 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18971 // Mask to size of LDMemType
18972 auto Mask =
18974 StMemSize.getFixedValue()),
18975 SDLoc(ST), STType);
18976 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18977 return ReplaceLd(LD, Val, Chain);
18978 }
18979 }
18980
18981 // Handle some cases for big-endian that would be Offset 0 and handled for
18982 // little-endian.
18983 SDValue Val = ST->getValue();
18984 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18985 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18986 !LDType.isVector() && isTypeLegal(STType) &&
18987 TLI.isOperationLegal(ISD::SRL, STType)) {
18988 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18989 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18990 Offset = 0;
18991 }
18992 }
18993
18994 // TODO: Deal with nonzero offset.
18995 if (LD->getBasePtr().isUndef() || Offset != 0)
18996 return SDValue();
18997 // Model necessary truncations / extenstions.
18998 // Truncate Value To Stored Memory Size.
18999 do {
19000 if (!getTruncatedStoreValue(ST, Val))
19001 continue;
19002 if (!isTypeLegal(LDMemType))
19003 continue;
19004 if (STMemType != LDMemType) {
19005 // TODO: Support vectors? This requires extract_subvector/bitcast.
19006 if (!STMemType.isVector() && !LDMemType.isVector() &&
19007 STMemType.isInteger() && LDMemType.isInteger())
19008 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
19009 else
19010 continue;
19011 }
19012 if (!extendLoadedValueToExtension(LD, Val))
19013 continue;
19014 return ReplaceLd(LD, Val, Chain);
19015 } while (false);
19016
19017 // On failure, cleanup dead nodes we may have created.
19018 if (Val->use_empty())
19019 deleteAndRecombine(Val.getNode());
19020 return SDValue();
19021}
19022
19023SDValue DAGCombiner::visitLOAD(SDNode *N) {
19024 LoadSDNode *LD = cast<LoadSDNode>(N);
19025 SDValue Chain = LD->getChain();
19026 SDValue Ptr = LD->getBasePtr();
19027
19028 // If load is not volatile and there are no uses of the loaded value (and
19029 // the updated indexed value in case of indexed loads), change uses of the
19030 // chain value into uses of the chain input (i.e. delete the dead load).
19031 // TODO: Allow this for unordered atomics (see D66309)
19032 if (LD->isSimple()) {
19033 if (N->getValueType(1) == MVT::Other) {
19034 // Unindexed loads.
19035 if (!N->hasAnyUseOfValue(0)) {
19036 // It's not safe to use the two value CombineTo variant here. e.g.
19037 // v1, chain2 = load chain1, loc
19038 // v2, chain3 = load chain2, loc
19039 // v3 = add v2, c
19040 // Now we replace use of chain2 with chain1. This makes the second load
19041 // isomorphic to the one we are deleting, and thus makes this load live.
19042 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19043 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19044 dbgs() << "\n");
19045 WorklistRemover DeadNodes(*this);
19046 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19047 AddUsersToWorklist(Chain.getNode());
19048 if (N->use_empty())
19049 deleteAndRecombine(N);
19050
19051 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19052 }
19053 } else {
19054 // Indexed loads.
19055 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19056
19057 // If this load has an opaque TargetConstant offset, then we cannot split
19058 // the indexing into an add/sub directly (that TargetConstant may not be
19059 // valid for a different type of node, and we cannot convert an opaque
19060 // target constant into a regular constant).
19061 bool CanSplitIdx = canSplitIdx(LD);
19062
19063 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19064 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19065 SDValue Index;
19066 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19067 Index = SplitIndexingFromLoad(LD);
19068 // Try to fold the base pointer arithmetic into subsequent loads and
19069 // stores.
19070 AddUsersToWorklist(N);
19071 } else
19072 Index = DAG.getUNDEF(N->getValueType(1));
19073 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19074 dbgs() << "\nWith: "; Undef.dump(&DAG);
19075 dbgs() << " and 2 other values\n");
19076 WorklistRemover DeadNodes(*this);
19077 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19079 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19080 deleteAndRecombine(N);
19081 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19082 }
19083 }
19084 }
19085
19086 // If this load is directly stored, replace the load value with the stored
19087 // value.
19088 if (auto V = ForwardStoreValueToDirectLoad(LD))
19089 return V;
19090
19091 // Try to infer better alignment information than the load already has.
19092 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19093 !LD->isAtomic()) {
19094 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19095 if (*Alignment > LD->getAlign() &&
19096 isAligned(*Alignment, LD->getSrcValueOffset())) {
19097 SDValue NewLoad = DAG.getExtLoad(
19098 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19099 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19100 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19101 // NewLoad will always be N as we are only refining the alignment
19102 assert(NewLoad.getNode() == N);
19103 (void)NewLoad;
19104 }
19105 }
19106 }
19107
19108 if (LD->isUnindexed()) {
19109 // Walk up chain skipping non-aliasing memory nodes.
19110 SDValue BetterChain = FindBetterChain(LD, Chain);
19111
19112 // If there is a better chain.
19113 if (Chain != BetterChain) {
19114 SDValue ReplLoad;
19115
19116 // Replace the chain to void dependency.
19117 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19118 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19119 BetterChain, Ptr, LD->getMemOperand());
19120 } else {
19121 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19122 LD->getValueType(0),
19123 BetterChain, Ptr, LD->getMemoryVT(),
19124 LD->getMemOperand());
19125 }
19126
19127 // Create token factor to keep old chain connected.
19128 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19129 MVT::Other, Chain, ReplLoad.getValue(1));
19130
19131 // Replace uses with load result and token factor
19132 return CombineTo(N, ReplLoad.getValue(0), Token);
19133 }
19134 }
19135
19136 // Try transforming N to an indexed load.
19137 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19138 return SDValue(N, 0);
19139
19140 // Try to slice up N to more direct loads if the slices are mapped to
19141 // different register banks or pairing can take place.
19142 if (SliceUpLoad(N))
19143 return SDValue(N, 0);
19144
19145 return SDValue();
19146}
19147
19148namespace {
19149
19150/// Helper structure used to slice a load in smaller loads.
19151/// Basically a slice is obtained from the following sequence:
19152/// Origin = load Ty1, Base
19153/// Shift = srl Ty1 Origin, CstTy Amount
19154/// Inst = trunc Shift to Ty2
19155///
19156/// Then, it will be rewritten into:
19157/// Slice = load SliceTy, Base + SliceOffset
19158/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19159///
19160/// SliceTy is deduced from the number of bits that are actually used to
19161/// build Inst.
19162struct LoadedSlice {
19163 /// Helper structure used to compute the cost of a slice.
19164 struct Cost {
19165 /// Are we optimizing for code size.
19166 bool ForCodeSize = false;
19167
19168 /// Various cost.
19169 unsigned Loads = 0;
19170 unsigned Truncates = 0;
19171 unsigned CrossRegisterBanksCopies = 0;
19172 unsigned ZExts = 0;
19173 unsigned Shift = 0;
19174
19175 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19176
19177 /// Get the cost of one isolated slice.
19178 Cost(const LoadedSlice &LS, bool ForCodeSize)
19179 : ForCodeSize(ForCodeSize), Loads(1) {
19180 EVT TruncType = LS.Inst->getValueType(0);
19181 EVT LoadedType = LS.getLoadedType();
19182 if (TruncType != LoadedType &&
19183 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19184 ZExts = 1;
19185 }
19186
19187 /// Account for slicing gain in the current cost.
19188 /// Slicing provide a few gains like removing a shift or a
19189 /// truncate. This method allows to grow the cost of the original
19190 /// load with the gain from this slice.
19191 void addSliceGain(const LoadedSlice &LS) {
19192 // Each slice saves a truncate.
19193 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19194 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19195 ++Truncates;
19196 // If there is a shift amount, this slice gets rid of it.
19197 if (LS.Shift)
19198 ++Shift;
19199 // If this slice can merge a cross register bank copy, account for it.
19200 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19201 ++CrossRegisterBanksCopies;
19202 }
19203
19204 Cost &operator+=(const Cost &RHS) {
19205 Loads += RHS.Loads;
19206 Truncates += RHS.Truncates;
19207 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19208 ZExts += RHS.ZExts;
19209 Shift += RHS.Shift;
19210 return *this;
19211 }
19212
19213 bool operator==(const Cost &RHS) const {
19214 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19215 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19216 ZExts == RHS.ZExts && Shift == RHS.Shift;
19217 }
19218
19219 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19220
19221 bool operator<(const Cost &RHS) const {
19222 // Assume cross register banks copies are as expensive as loads.
19223 // FIXME: Do we want some more target hooks?
19224 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19225 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19226 // Unless we are optimizing for code size, consider the
19227 // expensive operation first.
19228 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19229 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19230 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19231 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19232 }
19233
19234 bool operator>(const Cost &RHS) const { return RHS < *this; }
19235
19236 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19237
19238 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19239 };
19240
19241 // The last instruction that represent the slice. This should be a
19242 // truncate instruction.
19243 SDNode *Inst;
19244
19245 // The original load instruction.
19246 LoadSDNode *Origin;
19247
19248 // The right shift amount in bits from the original load.
19249 unsigned Shift;
19250
19251 // The DAG from which Origin came from.
19252 // This is used to get some contextual information about legal types, etc.
19253 SelectionDAG *DAG;
19254
19255 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19256 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19257 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19258
19259 /// Get the bits used in a chunk of bits \p BitWidth large.
19260 /// \return Result is \p BitWidth and has used bits set to 1 and
19261 /// not used bits set to 0.
19262 APInt getUsedBits() const {
19263 // Reproduce the trunc(lshr) sequence:
19264 // - Start from the truncated value.
19265 // - Zero extend to the desired bit width.
19266 // - Shift left.
19267 assert(Origin && "No original load to compare against.");
19268 unsigned BitWidth = Origin->getValueSizeInBits(0);
19269 assert(Inst && "This slice is not bound to an instruction");
19270 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19271 "Extracted slice is bigger than the whole type!");
19272 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19273 UsedBits.setAllBits();
19274 UsedBits = UsedBits.zext(BitWidth);
19275 UsedBits <<= Shift;
19276 return UsedBits;
19277 }
19278
19279 /// Get the size of the slice to be loaded in bytes.
19280 unsigned getLoadedSize() const {
19281 unsigned SliceSize = getUsedBits().popcount();
19282 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19283 return SliceSize / 8;
19284 }
19285
19286 /// Get the type that will be loaded for this slice.
19287 /// Note: This may not be the final type for the slice.
19288 EVT getLoadedType() const {
19289 assert(DAG && "Missing context");
19290 LLVMContext &Ctxt = *DAG->getContext();
19291 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19292 }
19293
19294 /// Get the alignment of the load used for this slice.
19295 Align getAlign() const {
19296 Align Alignment = Origin->getAlign();
19297 uint64_t Offset = getOffsetFromBase();
19298 if (Offset != 0)
19299 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19300 return Alignment;
19301 }
19302
19303 /// Check if this slice can be rewritten with legal operations.
19304 bool isLegal() const {
19305 // An invalid slice is not legal.
19306 if (!Origin || !Inst || !DAG)
19307 return false;
19308
19309 // Offsets are for indexed load only, we do not handle that.
19310 if (!Origin->getOffset().isUndef())
19311 return false;
19312
19313 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19314
19315 // Check that the type is legal.
19316 EVT SliceType = getLoadedType();
19317 if (!TLI.isTypeLegal(SliceType))
19318 return false;
19319
19320 // Check that the load is legal for this type.
19321 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19322 return false;
19323
19324 // Check that the offset can be computed.
19325 // 1. Check its type.
19326 EVT PtrType = Origin->getBasePtr().getValueType();
19327 if (PtrType == MVT::Untyped || PtrType.isExtended())
19328 return false;
19329
19330 // 2. Check that it fits in the immediate.
19331 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19332 return false;
19333
19334 // 3. Check that the computation is legal.
19335 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19336 return false;
19337
19338 // Check that the zext is legal if it needs one.
19339 EVT TruncateType = Inst->getValueType(0);
19340 if (TruncateType != SliceType &&
19341 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19342 return false;
19343
19344 return true;
19345 }
19346
19347 /// Get the offset in bytes of this slice in the original chunk of
19348 /// bits.
19349 /// \pre DAG != nullptr.
19350 uint64_t getOffsetFromBase() const {
19351 assert(DAG && "Missing context.");
19352 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19353 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19354 uint64_t Offset = Shift / 8;
19355 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19356 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19357 "The size of the original loaded type is not a multiple of a"
19358 " byte.");
19359 // If Offset is bigger than TySizeInBytes, it means we are loading all
19360 // zeros. This should have been optimized before in the process.
19361 assert(TySizeInBytes > Offset &&
19362 "Invalid shift amount for given loaded size");
19363 if (IsBigEndian)
19364 Offset = TySizeInBytes - Offset - getLoadedSize();
19365 return Offset;
19366 }
19367
19368 /// Generate the sequence of instructions to load the slice
19369 /// represented by this object and redirect the uses of this slice to
19370 /// this new sequence of instructions.
19371 /// \pre this->Inst && this->Origin are valid Instructions and this
19372 /// object passed the legal check: LoadedSlice::isLegal returned true.
19373 /// \return The last instruction of the sequence used to load the slice.
19374 SDValue loadSlice() const {
19375 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19376 const SDValue &OldBaseAddr = Origin->getBasePtr();
19377 SDValue BaseAddr = OldBaseAddr;
19378 // Get the offset in that chunk of bytes w.r.t. the endianness.
19379 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19380 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19381 if (Offset) {
19382 // BaseAddr = BaseAddr + Offset.
19383 EVT ArithType = BaseAddr.getValueType();
19384 SDLoc DL(Origin);
19385 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19386 DAG->getConstant(Offset, DL, ArithType));
19387 }
19388
19389 // Create the type of the loaded slice according to its size.
19390 EVT SliceType = getLoadedType();
19391
19392 // Create the load for the slice.
19393 SDValue LastInst =
19394 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19396 Origin->getMemOperand()->getFlags());
19397 // If the final type is not the same as the loaded type, this means that
19398 // we have to pad with zero. Create a zero extend for that.
19399 EVT FinalType = Inst->getValueType(0);
19400 if (SliceType != FinalType)
19401 LastInst =
19402 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19403 return LastInst;
19404 }
19405
19406 /// Check if this slice can be merged with an expensive cross register
19407 /// bank copy. E.g.,
19408 /// i = load i32
19409 /// f = bitcast i32 i to float
19410 bool canMergeExpensiveCrossRegisterBankCopy() const {
19411 if (!Inst || !Inst->hasOneUse())
19412 return false;
19413 SDNode *Use = *Inst->use_begin();
19414 if (Use->getOpcode() != ISD::BITCAST)
19415 return false;
19416 assert(DAG && "Missing context");
19417 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19418 EVT ResVT = Use->getValueType(0);
19419 const TargetRegisterClass *ResRC =
19420 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19421 const TargetRegisterClass *ArgRC =
19422 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19423 Use->getOperand(0)->isDivergent());
19424 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19425 return false;
19426
19427 // At this point, we know that we perform a cross-register-bank copy.
19428 // Check if it is expensive.
19430 // Assume bitcasts are cheap, unless both register classes do not
19431 // explicitly share a common sub class.
19432 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19433 return false;
19434
19435 // Check if it will be merged with the load.
19436 // 1. Check the alignment / fast memory access constraint.
19437 unsigned IsFast = 0;
19438 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19439 Origin->getAddressSpace(), getAlign(),
19440 Origin->getMemOperand()->getFlags(), &IsFast) ||
19441 !IsFast)
19442 return false;
19443
19444 // 2. Check that the load is a legal operation for that type.
19445 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19446 return false;
19447
19448 // 3. Check that we do not have a zext in the way.
19449 if (Inst->getValueType(0) != getLoadedType())
19450 return false;
19451
19452 return true;
19453 }
19454};
19455
19456} // end anonymous namespace
19457
19458/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19459/// \p UsedBits looks like 0..0 1..1 0..0.
19460static bool areUsedBitsDense(const APInt &UsedBits) {
19461 // If all the bits are one, this is dense!
19462 if (UsedBits.isAllOnes())
19463 return true;
19464
19465 // Get rid of the unused bits on the right.
19466 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19467 // Get rid of the unused bits on the left.
19468 if (NarrowedUsedBits.countl_zero())
19469 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19470 // Check that the chunk of bits is completely used.
19471 return NarrowedUsedBits.isAllOnes();
19472}
19473
19474/// Check whether or not \p First and \p Second are next to each other
19475/// in memory. This means that there is no hole between the bits loaded
19476/// by \p First and the bits loaded by \p Second.
19477static bool areSlicesNextToEachOther(const LoadedSlice &First,
19478 const LoadedSlice &Second) {
19479 assert(First.Origin == Second.Origin && First.Origin &&
19480 "Unable to match different memory origins.");
19481 APInt UsedBits = First.getUsedBits();
19482 assert((UsedBits & Second.getUsedBits()) == 0 &&
19483 "Slices are not supposed to overlap.");
19484 UsedBits |= Second.getUsedBits();
19485 return areUsedBitsDense(UsedBits);
19486}
19487
19488/// Adjust the \p GlobalLSCost according to the target
19489/// paring capabilities and the layout of the slices.
19490/// \pre \p GlobalLSCost should account for at least as many loads as
19491/// there is in the slices in \p LoadedSlices.
19493 LoadedSlice::Cost &GlobalLSCost) {
19494 unsigned NumberOfSlices = LoadedSlices.size();
19495 // If there is less than 2 elements, no pairing is possible.
19496 if (NumberOfSlices < 2)
19497 return;
19498
19499 // Sort the slices so that elements that are likely to be next to each
19500 // other in memory are next to each other in the list.
19501 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19502 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19503 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19504 });
19505 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19506 // First (resp. Second) is the first (resp. Second) potentially candidate
19507 // to be placed in a paired load.
19508 const LoadedSlice *First = nullptr;
19509 const LoadedSlice *Second = nullptr;
19510 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19511 // Set the beginning of the pair.
19512 First = Second) {
19513 Second = &LoadedSlices[CurrSlice];
19514
19515 // If First is NULL, it means we start a new pair.
19516 // Get to the next slice.
19517 if (!First)
19518 continue;
19519
19520 EVT LoadedType = First->getLoadedType();
19521
19522 // If the types of the slices are different, we cannot pair them.
19523 if (LoadedType != Second->getLoadedType())
19524 continue;
19525
19526 // Check if the target supplies paired loads for this type.
19527 Align RequiredAlignment;
19528 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19529 // move to the next pair, this type is hopeless.
19530 Second = nullptr;
19531 continue;
19532 }
19533 // Check if we meet the alignment requirement.
19534 if (First->getAlign() < RequiredAlignment)
19535 continue;
19536
19537 // Check that both loads are next to each other in memory.
19538 if (!areSlicesNextToEachOther(*First, *Second))
19539 continue;
19540
19541 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19542 --GlobalLSCost.Loads;
19543 // Move to the next pair.
19544 Second = nullptr;
19545 }
19546}
19547
19548/// Check the profitability of all involved LoadedSlice.
19549/// Currently, it is considered profitable if there is exactly two
19550/// involved slices (1) which are (2) next to each other in memory, and
19551/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19552///
19553/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19554/// the elements themselves.
19555///
19556/// FIXME: When the cost model will be mature enough, we can relax
19557/// constraints (1) and (2).
19559 const APInt &UsedBits, bool ForCodeSize) {
19560 unsigned NumberOfSlices = LoadedSlices.size();
19562 return NumberOfSlices > 1;
19563
19564 // Check (1).
19565 if (NumberOfSlices != 2)
19566 return false;
19567
19568 // Check (2).
19569 if (!areUsedBitsDense(UsedBits))
19570 return false;
19571
19572 // Check (3).
19573 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19574 // The original code has one big load.
19575 OrigCost.Loads = 1;
19576 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19577 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19578 // Accumulate the cost of all the slices.
19579 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19580 GlobalSlicingCost += SliceCost;
19581
19582 // Account as cost in the original configuration the gain obtained
19583 // with the current slices.
19584 OrigCost.addSliceGain(LS);
19585 }
19586
19587 // If the target supports paired load, adjust the cost accordingly.
19588 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19589 return OrigCost > GlobalSlicingCost;
19590}
19591
19592/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19593/// operations, split it in the various pieces being extracted.
19594///
19595/// This sort of thing is introduced by SROA.
19596/// This slicing takes care not to insert overlapping loads.
19597/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19598bool DAGCombiner::SliceUpLoad(SDNode *N) {
19599 if (Level < AfterLegalizeDAG)
19600 return false;
19601
19602 LoadSDNode *LD = cast<LoadSDNode>(N);
19603 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19604 !LD->getValueType(0).isInteger())
19605 return false;
19606
19607 // The algorithm to split up a load of a scalable vector into individual
19608 // elements currently requires knowing the length of the loaded type,
19609 // so will need adjusting to work on scalable vectors.
19610 if (LD->getValueType(0).isScalableVector())
19611 return false;
19612
19613 // Keep track of already used bits to detect overlapping values.
19614 // In that case, we will just abort the transformation.
19615 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19616
19617 SmallVector<LoadedSlice, 4> LoadedSlices;
19618
19619 // Check if this load is used as several smaller chunks of bits.
19620 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19621 // of computation for each trunc.
19622 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19623 UI != UIEnd; ++UI) {
19624 // Skip the uses of the chain.
19625 if (UI.getUse().getResNo() != 0)
19626 continue;
19627
19628 SDNode *User = *UI;
19629 unsigned Shift = 0;
19630
19631 // Check if this is a trunc(lshr).
19632 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19633 isa<ConstantSDNode>(User->getOperand(1))) {
19634 Shift = User->getConstantOperandVal(1);
19635 User = *User->use_begin();
19636 }
19637
19638 // At this point, User is a Truncate, iff we encountered, trunc or
19639 // trunc(lshr).
19640 if (User->getOpcode() != ISD::TRUNCATE)
19641 return false;
19642
19643 // The width of the type must be a power of 2 and greater than 8-bits.
19644 // Otherwise the load cannot be represented in LLVM IR.
19645 // Moreover, if we shifted with a non-8-bits multiple, the slice
19646 // will be across several bytes. We do not support that.
19647 unsigned Width = User->getValueSizeInBits(0);
19648 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19649 return false;
19650
19651 // Build the slice for this chain of computations.
19652 LoadedSlice LS(User, LD, Shift, &DAG);
19653 APInt CurrentUsedBits = LS.getUsedBits();
19654
19655 // Check if this slice overlaps with another.
19656 if ((CurrentUsedBits & UsedBits) != 0)
19657 return false;
19658 // Update the bits used globally.
19659 UsedBits |= CurrentUsedBits;
19660
19661 // Check if the new slice would be legal.
19662 if (!LS.isLegal())
19663 return false;
19664
19665 // Record the slice.
19666 LoadedSlices.push_back(LS);
19667 }
19668
19669 // Abort slicing if it does not seem to be profitable.
19670 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19671 return false;
19672
19673 ++SlicedLoads;
19674
19675 // Rewrite each chain to use an independent load.
19676 // By construction, each chain can be represented by a unique load.
19677
19678 // Prepare the argument for the new token factor for all the slices.
19679 SmallVector<SDValue, 8> ArgChains;
19680 for (const LoadedSlice &LS : LoadedSlices) {
19681 SDValue SliceInst = LS.loadSlice();
19682 CombineTo(LS.Inst, SliceInst, true);
19683 if (SliceInst.getOpcode() != ISD::LOAD)
19684 SliceInst = SliceInst.getOperand(0);
19685 assert(SliceInst->getOpcode() == ISD::LOAD &&
19686 "It takes more than a zext to get to the loaded slice!!");
19687 ArgChains.push_back(SliceInst.getValue(1));
19688 }
19689
19690 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19691 ArgChains);
19692 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19693 AddToWorklist(Chain.getNode());
19694 return true;
19695}
19696
19697/// Check to see if V is (and load (ptr), imm), where the load is having
19698/// specific bytes cleared out. If so, return the byte size being masked out
19699/// and the shift amount.
19700static std::pair<unsigned, unsigned>
19702 std::pair<unsigned, unsigned> Result(0, 0);
19703
19704 // Check for the structure we're looking for.
19705 if (V->getOpcode() != ISD::AND ||
19706 !isa<ConstantSDNode>(V->getOperand(1)) ||
19707 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19708 return Result;
19709
19710 // Check the chain and pointer.
19711 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19712 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19713
19714 // This only handles simple types.
19715 if (V.getValueType() != MVT::i16 &&
19716 V.getValueType() != MVT::i32 &&
19717 V.getValueType() != MVT::i64)
19718 return Result;
19719
19720 // Check the constant mask. Invert it so that the bits being masked out are
19721 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19722 // follow the sign bit for uniformity.
19723 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19724 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19725 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19726 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19727 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19728 if (NotMaskLZ == 64) return Result; // All zero mask.
19729
19730 // See if we have a continuous run of bits. If so, we have 0*1+0*
19731 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19732 return Result;
19733
19734 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19735 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19736 NotMaskLZ -= 64-V.getValueSizeInBits();
19737
19738 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19739 switch (MaskedBytes) {
19740 case 1:
19741 case 2:
19742 case 4: break;
19743 default: return Result; // All one mask, or 5-byte mask.
19744 }
19745
19746 // Verify that the first bit starts at a multiple of mask so that the access
19747 // is aligned the same as the access width.
19748 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19749
19750 // For narrowing to be valid, it must be the case that the load the
19751 // immediately preceding memory operation before the store.
19752 if (LD == Chain.getNode())
19753 ; // ok.
19754 else if (Chain->getOpcode() == ISD::TokenFactor &&
19755 SDValue(LD, 1).hasOneUse()) {
19756 // LD has only 1 chain use so they are no indirect dependencies.
19757 if (!LD->isOperandOf(Chain.getNode()))
19758 return Result;
19759 } else
19760 return Result; // Fail.
19761
19762 Result.first = MaskedBytes;
19763 Result.second = NotMaskTZ/8;
19764 return Result;
19765}
19766
19767/// Check to see if IVal is something that provides a value as specified by
19768/// MaskInfo. If so, replace the specified store with a narrower store of
19769/// truncated IVal.
19770static SDValue
19771ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19772 SDValue IVal, StoreSDNode *St,
19773 DAGCombiner *DC) {
19774 unsigned NumBytes = MaskInfo.first;
19775 unsigned ByteShift = MaskInfo.second;
19776 SelectionDAG &DAG = DC->getDAG();
19777
19778 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19779 // that uses this. If not, this is not a replacement.
19780 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19781 ByteShift*8, (ByteShift+NumBytes)*8);
19782 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19783
19784 // Check that it is legal on the target to do this. It is legal if the new
19785 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19786 // legalization. If the source type is legal, but the store type isn't, see
19787 // if we can use a truncating store.
19788 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19789 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19790 bool UseTruncStore;
19791 if (DC->isTypeLegal(VT))
19792 UseTruncStore = false;
19793 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19794 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19795 UseTruncStore = true;
19796 else
19797 return SDValue();
19798
19799 // Can't do this for indexed stores.
19800 if (St->isIndexed())
19801 return SDValue();
19802
19803 // Check that the target doesn't think this is a bad idea.
19804 if (St->getMemOperand() &&
19805 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19806 *St->getMemOperand()))
19807 return SDValue();
19808
19809 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19810 // shifted by ByteShift and truncated down to NumBytes.
19811 if (ByteShift) {
19812 SDLoc DL(IVal);
19813 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19814 DAG.getConstant(ByteShift*8, DL,
19815 DC->getShiftAmountTy(IVal.getValueType())));
19816 }
19817
19818 // Figure out the offset for the store and the alignment of the access.
19819 unsigned StOffset;
19820 if (DAG.getDataLayout().isLittleEndian())
19821 StOffset = ByteShift;
19822 else
19823 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19824
19825 SDValue Ptr = St->getBasePtr();
19826 if (StOffset) {
19827 SDLoc DL(IVal);
19829 }
19830
19831 ++OpsNarrowed;
19832 if (UseTruncStore)
19833 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19834 St->getPointerInfo().getWithOffset(StOffset),
19835 VT, St->getOriginalAlign());
19836
19837 // Truncate down to the new size.
19838 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19839
19840 return DAG
19841 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19842 St->getPointerInfo().getWithOffset(StOffset),
19843 St->getOriginalAlign());
19844}
19845
19846/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19847/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19848/// narrowing the load and store if it would end up being a win for performance
19849/// or code size.
19850SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19851 StoreSDNode *ST = cast<StoreSDNode>(N);
19852 if (!ST->isSimple())
19853 return SDValue();
19854
19855 SDValue Chain = ST->getChain();
19856 SDValue Value = ST->getValue();
19857 SDValue Ptr = ST->getBasePtr();
19858 EVT VT = Value.getValueType();
19859
19860 if (ST->isTruncatingStore() || VT.isVector())
19861 return SDValue();
19862
19863 unsigned Opc = Value.getOpcode();
19864
19865 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19866 !Value.hasOneUse())
19867 return SDValue();
19868
19869 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19870 // is a byte mask indicating a consecutive number of bytes, check to see if
19871 // Y is known to provide just those bytes. If so, we try to replace the
19872 // load + replace + store sequence with a single (narrower) store, which makes
19873 // the load dead.
19875 std::pair<unsigned, unsigned> MaskedLoad;
19876 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19877 if (MaskedLoad.first)
19878 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19879 Value.getOperand(1), ST,this))
19880 return NewST;
19881
19882 // Or is commutative, so try swapping X and Y.
19883 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19884 if (MaskedLoad.first)
19885 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19886 Value.getOperand(0), ST,this))
19887 return NewST;
19888 }
19889
19891 return SDValue();
19892
19893 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19894 return SDValue();
19895
19896 SDValue N0 = Value.getOperand(0);
19897 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19898 Chain == SDValue(N0.getNode(), 1)) {
19899 LoadSDNode *LD = cast<LoadSDNode>(N0);
19900 if (LD->getBasePtr() != Ptr ||
19901 LD->getPointerInfo().getAddrSpace() !=
19902 ST->getPointerInfo().getAddrSpace())
19903 return SDValue();
19904
19905 // Find the type to narrow it the load / op / store to.
19906 SDValue N1 = Value.getOperand(1);
19907 unsigned BitWidth = N1.getValueSizeInBits();
19908 APInt Imm = N1->getAsAPIntVal();
19909 if (Opc == ISD::AND)
19911 if (Imm == 0 || Imm.isAllOnes())
19912 return SDValue();
19913 unsigned ShAmt = Imm.countr_zero();
19914 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19915 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19916 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19917 // The narrowing should be profitable, the load/store operation should be
19918 // legal (or custom) and the store size should be equal to the NewVT width.
19919 while (NewBW < BitWidth &&
19920 (NewVT.getStoreSizeInBits() != NewBW ||
19921 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19922 !TLI.isNarrowingProfitable(VT, NewVT))) {
19923 NewBW = NextPowerOf2(NewBW);
19924 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19925 }
19926 if (NewBW >= BitWidth)
19927 return SDValue();
19928
19929 // If the lsb changed does not start at the type bitwidth boundary,
19930 // start at the previous one.
19931 if (ShAmt % NewBW)
19932 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19934 std::min(BitWidth, ShAmt + NewBW));
19935 if ((Imm & Mask) == Imm) {
19936 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19937 if (Opc == ISD::AND)
19938 NewImm ^= APInt::getAllOnes(NewBW);
19939 uint64_t PtrOff = ShAmt / 8;
19940 // For big endian targets, we need to adjust the offset to the pointer to
19941 // load the correct bytes.
19942 if (DAG.getDataLayout().isBigEndian())
19943 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19944
19945 unsigned IsFast = 0;
19946 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19947 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19948 LD->getAddressSpace(), NewAlign,
19949 LD->getMemOperand()->getFlags(), &IsFast) ||
19950 !IsFast)
19951 return SDValue();
19952
19953 SDValue NewPtr =
19955 SDValue NewLD =
19956 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19957 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19958 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19959 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19960 DAG.getConstant(NewImm, SDLoc(Value),
19961 NewVT));
19962 SDValue NewST =
19963 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19964 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19965
19966 AddToWorklist(NewPtr.getNode());
19967 AddToWorklist(NewLD.getNode());
19968 AddToWorklist(NewVal.getNode());
19969 WorklistRemover DeadNodes(*this);
19970 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19971 ++OpsNarrowed;
19972 return NewST;
19973 }
19974 }
19975
19976 return SDValue();
19977}
19978
19979/// For a given floating point load / store pair, if the load value isn't used
19980/// by any other operations, then consider transforming the pair to integer
19981/// load / store operations if the target deems the transformation profitable.
19982SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19983 StoreSDNode *ST = cast<StoreSDNode>(N);
19984 SDValue Value = ST->getValue();
19985 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19986 Value.hasOneUse()) {
19987 LoadSDNode *LD = cast<LoadSDNode>(Value);
19988 EVT VT = LD->getMemoryVT();
19989 if (!VT.isFloatingPoint() ||
19990 VT != ST->getMemoryVT() ||
19991 LD->isNonTemporal() ||
19992 ST->isNonTemporal() ||
19993 LD->getPointerInfo().getAddrSpace() != 0 ||
19994 ST->getPointerInfo().getAddrSpace() != 0)
19995 return SDValue();
19996
19997 TypeSize VTSize = VT.getSizeInBits();
19998
19999 // We don't know the size of scalable types at compile time so we cannot
20000 // create an integer of the equivalent size.
20001 if (VTSize.isScalable())
20002 return SDValue();
20003
20004 unsigned FastLD = 0, FastST = 0;
20005 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
20006 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
20007 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
20010 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20011 *LD->getMemOperand(), &FastLD) ||
20012 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20013 *ST->getMemOperand(), &FastST) ||
20014 !FastLD || !FastST)
20015 return SDValue();
20016
20017 SDValue NewLD =
20018 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
20019 LD->getPointerInfo(), LD->getAlign());
20020
20021 SDValue NewST =
20022 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
20023 ST->getPointerInfo(), ST->getAlign());
20024
20025 AddToWorklist(NewLD.getNode());
20026 AddToWorklist(NewST.getNode());
20027 WorklistRemover DeadNodes(*this);
20028 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20029 ++LdStFP2Int;
20030 return NewST;
20031 }
20032
20033 return SDValue();
20034}
20035
20036// This is a helper function for visitMUL to check the profitability
20037// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20038// MulNode is the original multiply, AddNode is (add x, c1),
20039// and ConstNode is c2.
20040//
20041// If the (add x, c1) has multiple uses, we could increase
20042// the number of adds if we make this transformation.
20043// It would only be worth doing this if we can remove a
20044// multiply in the process. Check for that here.
20045// To illustrate:
20046// (A + c1) * c3
20047// (A + c2) * c3
20048// We're checking for cases where we have common "c3 * A" expressions.
20049bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20050 SDValue ConstNode) {
20051 APInt Val;
20052
20053 // If the add only has one use, and the target thinks the folding is
20054 // profitable or does not lead to worse code, this would be OK to do.
20055 if (AddNode->hasOneUse() &&
20056 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20057 return true;
20058
20059 // Walk all the users of the constant with which we're multiplying.
20060 for (SDNode *Use : ConstNode->uses()) {
20061 if (Use == MulNode) // This use is the one we're on right now. Skip it.
20062 continue;
20063
20064 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
20065 SDNode *OtherOp;
20066 SDNode *MulVar = AddNode.getOperand(0).getNode();
20067
20068 // OtherOp is what we're multiplying against the constant.
20069 if (Use->getOperand(0) == ConstNode)
20070 OtherOp = Use->getOperand(1).getNode();
20071 else
20072 OtherOp = Use->getOperand(0).getNode();
20073
20074 // Check to see if multiply is with the same operand of our "add".
20075 //
20076 // ConstNode = CONST
20077 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
20078 // ...
20079 // AddNode = (A + c1) <-- MulVar is A.
20080 // = AddNode * ConstNode <-- current visiting instruction.
20081 //
20082 // If we make this transformation, we will have a common
20083 // multiply (ConstNode * A) that we can save.
20084 if (OtherOp == MulVar)
20085 return true;
20086
20087 // Now check to see if a future expansion will give us a common
20088 // multiply.
20089 //
20090 // ConstNode = CONST
20091 // AddNode = (A + c1)
20092 // ... = AddNode * ConstNode <-- current visiting instruction.
20093 // ...
20094 // OtherOp = (A + c2)
20095 // Use = OtherOp * ConstNode <-- visiting Use.
20096 //
20097 // If we make this transformation, we will have a common
20098 // multiply (CONST * A) after we also do the same transformation
20099 // to the "t2" instruction.
20100 if (OtherOp->getOpcode() == ISD::ADD &&
20102 OtherOp->getOperand(0).getNode() == MulVar)
20103 return true;
20104 }
20105 }
20106
20107 // Didn't find a case where this would be profitable.
20108 return false;
20109}
20110
20111SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20112 unsigned NumStores) {
20115 SDLoc StoreDL(StoreNodes[0].MemNode);
20116
20117 for (unsigned i = 0; i < NumStores; ++i) {
20118 Visited.insert(StoreNodes[i].MemNode);
20119 }
20120
20121 // don't include nodes that are children or repeated nodes.
20122 for (unsigned i = 0; i < NumStores; ++i) {
20123 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20124 Chains.push_back(StoreNodes[i].MemNode->getChain());
20125 }
20126
20127 assert(!Chains.empty() && "Chain should have generated a chain");
20128 return DAG.getTokenFactor(StoreDL, Chains);
20129}
20130
20131bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20132 const Value *UnderlyingObj = nullptr;
20133 for (const auto &MemOp : StoreNodes) {
20134 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20135 // Pseudo value like stack frame has its own frame index and size, should
20136 // not use the first store's frame index for other frames.
20137 if (MMO->getPseudoValue())
20138 return false;
20139
20140 if (!MMO->getValue())
20141 return false;
20142
20143 const Value *Obj = getUnderlyingObject(MMO->getValue());
20144
20145 if (UnderlyingObj && UnderlyingObj != Obj)
20146 return false;
20147
20148 if (!UnderlyingObj)
20149 UnderlyingObj = Obj;
20150 }
20151
20152 return true;
20153}
20154
20155bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20156 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20157 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20158 // Make sure we have something to merge.
20159 if (NumStores < 2)
20160 return false;
20161
20162 assert((!UseTrunc || !UseVector) &&
20163 "This optimization cannot emit a vector truncating store");
20164
20165 // The latest Node in the DAG.
20166 SDLoc DL(StoreNodes[0].MemNode);
20167
20168 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20169 unsigned SizeInBits = NumStores * ElementSizeBits;
20170 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20171
20172 std::optional<MachineMemOperand::Flags> Flags;
20173 AAMDNodes AAInfo;
20174 for (unsigned I = 0; I != NumStores; ++I) {
20175 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20176 if (!Flags) {
20177 Flags = St->getMemOperand()->getFlags();
20178 AAInfo = St->getAAInfo();
20179 continue;
20180 }
20181 // Skip merging if there's an inconsistent flag.
20182 if (Flags != St->getMemOperand()->getFlags())
20183 return false;
20184 // Concatenate AA metadata.
20185 AAInfo = AAInfo.concat(St->getAAInfo());
20186 }
20187
20188 EVT StoreTy;
20189 if (UseVector) {
20190 unsigned Elts = NumStores * NumMemElts;
20191 // Get the type for the merged vector store.
20192 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20193 } else
20194 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20195
20196 SDValue StoredVal;
20197 if (UseVector) {
20198 if (IsConstantSrc) {
20199 SmallVector<SDValue, 8> BuildVector;
20200 for (unsigned I = 0; I != NumStores; ++I) {
20201 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20202 SDValue Val = St->getValue();
20203 // If constant is of the wrong type, convert it now. This comes up
20204 // when one of our stores was truncating.
20205 if (MemVT != Val.getValueType()) {
20206 Val = peekThroughBitcasts(Val);
20207 // Deal with constants of wrong size.
20208 if (ElementSizeBits != Val.getValueSizeInBits()) {
20209 auto *C = dyn_cast<ConstantSDNode>(Val);
20210 if (!C)
20211 // Not clear how to truncate FP values.
20212 // TODO: Handle truncation of build_vector constants
20213 return false;
20214
20215 EVT IntMemVT =
20217 Val = DAG.getConstant(C->getAPIntValue()
20218 .zextOrTrunc(Val.getValueSizeInBits())
20219 .zextOrTrunc(ElementSizeBits),
20220 SDLoc(C), IntMemVT);
20221 }
20222 // Make sure correctly size type is the correct type.
20223 Val = DAG.getBitcast(MemVT, Val);
20224 }
20225 BuildVector.push_back(Val);
20226 }
20227 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20229 DL, StoreTy, BuildVector);
20230 } else {
20232 for (unsigned i = 0; i < NumStores; ++i) {
20233 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20235 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20236 // type MemVT. If the underlying value is not the correct
20237 // type, but it is an extraction of an appropriate vector we
20238 // can recast Val to be of the correct type. This may require
20239 // converting between EXTRACT_VECTOR_ELT and
20240 // EXTRACT_SUBVECTOR.
20241 if ((MemVT != Val.getValueType()) &&
20244 EVT MemVTScalarTy = MemVT.getScalarType();
20245 // We may need to add a bitcast here to get types to line up.
20246 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20247 Val = DAG.getBitcast(MemVT, Val);
20248 } else if (MemVT.isVector() &&
20250 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20251 } else {
20252 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20254 SDValue Vec = Val.getOperand(0);
20255 SDValue Idx = Val.getOperand(1);
20256 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20257 }
20258 }
20259 Ops.push_back(Val);
20260 }
20261
20262 // Build the extracted vector elements back into a vector.
20263 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20265 DL, StoreTy, Ops);
20266 }
20267 } else {
20268 // We should always use a vector store when merging extracted vector
20269 // elements, so this path implies a store of constants.
20270 assert(IsConstantSrc && "Merged vector elements should use vector store");
20271
20272 APInt StoreInt(SizeInBits, 0);
20273
20274 // Construct a single integer constant which is made of the smaller
20275 // constant inputs.
20276 bool IsLE = DAG.getDataLayout().isLittleEndian();
20277 for (unsigned i = 0; i < NumStores; ++i) {
20278 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20279 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20280
20281 SDValue Val = St->getValue();
20282 Val = peekThroughBitcasts(Val);
20283 StoreInt <<= ElementSizeBits;
20284 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20285 StoreInt |= C->getAPIntValue()
20286 .zextOrTrunc(ElementSizeBits)
20287 .zextOrTrunc(SizeInBits);
20288 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20289 StoreInt |= C->getValueAPF()
20290 .bitcastToAPInt()
20291 .zextOrTrunc(ElementSizeBits)
20292 .zextOrTrunc(SizeInBits);
20293 // If fp truncation is necessary give up for now.
20294 if (MemVT.getSizeInBits() != ElementSizeBits)
20295 return false;
20296 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20298 // Not yet handled
20299 return false;
20300 } else {
20301 llvm_unreachable("Invalid constant element type");
20302 }
20303 }
20304
20305 // Create the new Load and Store operations.
20306 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20307 }
20308
20309 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20310 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20311 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20312
20313 // make sure we use trunc store if it's necessary to be legal.
20314 // When generate the new widen store, if the first store's pointer info can
20315 // not be reused, discard the pointer info except the address space because
20316 // now the widen store can not be represented by the original pointer info
20317 // which is for the narrow memory object.
20318 SDValue NewStore;
20319 if (!UseTrunc) {
20320 NewStore = DAG.getStore(
20321 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20322 CanReusePtrInfo
20323 ? FirstInChain->getPointerInfo()
20324 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20325 FirstInChain->getAlign(), *Flags, AAInfo);
20326 } else { // Must be realized as a trunc store
20327 EVT LegalizedStoredValTy =
20328 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20329 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20330 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20331 SDValue ExtendedStoreVal =
20332 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20333 LegalizedStoredValTy);
20334 NewStore = DAG.getTruncStore(
20335 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20336 CanReusePtrInfo
20337 ? FirstInChain->getPointerInfo()
20338 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20339 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20340 AAInfo);
20341 }
20342
20343 // Replace all merged stores with the new store.
20344 for (unsigned i = 0; i < NumStores; ++i)
20345 CombineTo(StoreNodes[i].MemNode, NewStore);
20346
20347 AddToWorklist(NewChain.getNode());
20348 return true;
20349}
20350
20351void DAGCombiner::getStoreMergeCandidates(
20352 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20353 SDNode *&RootNode) {
20354 // This holds the base pointer, index, and the offset in bytes from the base
20355 // pointer. We must have a base and an offset. Do not handle stores to undef
20356 // base pointers.
20358 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20359 return;
20360
20362 StoreSource StoreSrc = getStoreSource(Val);
20363 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20364
20365 // Match on loadbaseptr if relevant.
20366 EVT MemVT = St->getMemoryVT();
20367 BaseIndexOffset LBasePtr;
20368 EVT LoadVT;
20369 if (StoreSrc == StoreSource::Load) {
20370 auto *Ld = cast<LoadSDNode>(Val);
20371 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20372 LoadVT = Ld->getMemoryVT();
20373 // Load and store should be the same type.
20374 if (MemVT != LoadVT)
20375 return;
20376 // Loads must only have one use.
20377 if (!Ld->hasNUsesOfValue(1, 0))
20378 return;
20379 // The memory operands must not be volatile/indexed/atomic.
20380 // TODO: May be able to relax for unordered atomics (see D66309)
20381 if (!Ld->isSimple() || Ld->isIndexed())
20382 return;
20383 }
20384 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20385 int64_t &Offset) -> bool {
20386 // The memory operands must not be volatile/indexed/atomic.
20387 // TODO: May be able to relax for unordered atomics (see D66309)
20388 if (!Other->isSimple() || Other->isIndexed())
20389 return false;
20390 // Don't mix temporal stores with non-temporal stores.
20391 if (St->isNonTemporal() != Other->isNonTemporal())
20392 return false;
20394 return false;
20395 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20396 // Allow merging constants of different types as integers.
20397 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20398 : Other->getMemoryVT() != MemVT;
20399 switch (StoreSrc) {
20400 case StoreSource::Load: {
20401 if (NoTypeMatch)
20402 return false;
20403 // The Load's Base Ptr must also match.
20404 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20405 if (!OtherLd)
20406 return false;
20407 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20408 if (LoadVT != OtherLd->getMemoryVT())
20409 return false;
20410 // Loads must only have one use.
20411 if (!OtherLd->hasNUsesOfValue(1, 0))
20412 return false;
20413 // The memory operands must not be volatile/indexed/atomic.
20414 // TODO: May be able to relax for unordered atomics (see D66309)
20415 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20416 return false;
20417 // Don't mix temporal loads with non-temporal loads.
20418 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20419 return false;
20420 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20421 *OtherLd))
20422 return false;
20423 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20424 return false;
20425 break;
20426 }
20427 case StoreSource::Constant:
20428 if (NoTypeMatch)
20429 return false;
20430 if (getStoreSource(OtherBC) != StoreSource::Constant)
20431 return false;
20432 break;
20433 case StoreSource::Extract:
20434 // Do not merge truncated stores here.
20435 if (Other->isTruncatingStore())
20436 return false;
20437 if (!MemVT.bitsEq(OtherBC.getValueType()))
20438 return false;
20439 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20440 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20441 return false;
20442 break;
20443 default:
20444 llvm_unreachable("Unhandled store source for merging");
20445 }
20447 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20448 };
20449
20450 // Check if the pair of StoreNode and the RootNode already bail out many
20451 // times which is over the limit in dependence check.
20452 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20453 SDNode *RootNode) -> bool {
20454 auto RootCount = StoreRootCountMap.find(StoreNode);
20455 return RootCount != StoreRootCountMap.end() &&
20456 RootCount->second.first == RootNode &&
20457 RootCount->second.second > StoreMergeDependenceLimit;
20458 };
20459
20460 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20461 // This must be a chain use.
20462 if (UseIter.getOperandNo() != 0)
20463 return;
20464 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20466 int64_t PtrDiff;
20467 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20468 !OverLimitInDependenceCheck(OtherStore, RootNode))
20469 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20470 }
20471 };
20472
20473 // We looking for a root node which is an ancestor to all mergable
20474 // stores. We search up through a load, to our root and then down
20475 // through all children. For instance we will find Store{1,2,3} if
20476 // St is Store1, Store2. or Store3 where the root is not a load
20477 // which always true for nonvolatile ops. TODO: Expand
20478 // the search to find all valid candidates through multiple layers of loads.
20479 //
20480 // Root
20481 // |-------|-------|
20482 // Load Load Store3
20483 // | |
20484 // Store1 Store2
20485 //
20486 // FIXME: We should be able to climb and
20487 // descend TokenFactors to find candidates as well.
20488
20489 RootNode = St->getChain().getNode();
20490
20491 unsigned NumNodesExplored = 0;
20492 const unsigned MaxSearchNodes = 1024;
20493 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20494 RootNode = Ldn->getChain().getNode();
20495 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20496 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20497 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20498 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20499 TryToAddCandidate(I2);
20500 }
20501 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20502 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20503 TryToAddCandidate(I);
20504 }
20505 }
20506 } else {
20507 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20508 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20509 TryToAddCandidate(I);
20510 }
20511}
20512
20513// We need to check that merging these stores does not cause a loop in the
20514// DAG. Any store candidate may depend on another candidate indirectly through
20515// its operands. Check in parallel by searching up from operands of candidates.
20516bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20517 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20518 SDNode *RootNode) {
20519 // FIXME: We should be able to truncate a full search of
20520 // predecessors by doing a BFS and keeping tabs the originating
20521 // stores from which worklist nodes come from in a similar way to
20522 // TokenFactor simplfication.
20523
20526
20527 // RootNode is a predecessor to all candidates so we need not search
20528 // past it. Add RootNode (peeking through TokenFactors). Do not count
20529 // these towards size check.
20530
20531 Worklist.push_back(RootNode);
20532 while (!Worklist.empty()) {
20533 auto N = Worklist.pop_back_val();
20534 if (!Visited.insert(N).second)
20535 continue; // Already present in Visited.
20536 if (N->getOpcode() == ISD::TokenFactor) {
20537 for (SDValue Op : N->ops())
20538 Worklist.push_back(Op.getNode());
20539 }
20540 }
20541
20542 // Don't count pruning nodes towards max.
20543 unsigned int Max = 1024 + Visited.size();
20544 // Search Ops of store candidates.
20545 for (unsigned i = 0; i < NumStores; ++i) {
20546 SDNode *N = StoreNodes[i].MemNode;
20547 // Of the 4 Store Operands:
20548 // * Chain (Op 0) -> We have already considered these
20549 // in candidate selection, but only by following the
20550 // chain dependencies. We could still have a chain
20551 // dependency to a load, that has a non-chain dep to
20552 // another load, that depends on a store, etc. So it is
20553 // possible to have dependencies that consist of a mix
20554 // of chain and non-chain deps, and we need to include
20555 // chain operands in the analysis here..
20556 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20557 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20558 // but aren't necessarily fromt the same base node, so
20559 // cycles possible (e.g. via indexed store).
20560 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20561 // non-indexed stores). Not constant on all targets (e.g. ARM)
20562 // and so can participate in a cycle.
20563 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20564 Worklist.push_back(N->getOperand(j).getNode());
20565 }
20566 // Search through DAG. We can stop early if we find a store node.
20567 for (unsigned i = 0; i < NumStores; ++i)
20568 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20569 Max)) {
20570 // If the searching bail out, record the StoreNode and RootNode in the
20571 // StoreRootCountMap. If we have seen the pair many times over a limit,
20572 // we won't add the StoreNode into StoreNodes set again.
20573 if (Visited.size() >= Max) {
20574 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20575 if (RootCount.first == RootNode)
20576 RootCount.second++;
20577 else
20578 RootCount = {RootNode, 1};
20579 }
20580 return false;
20581 }
20582 return true;
20583}
20584
20585unsigned
20586DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20587 int64_t ElementSizeBytes) const {
20588 while (true) {
20589 // Find a store past the width of the first store.
20590 size_t StartIdx = 0;
20591 while ((StartIdx + 1 < StoreNodes.size()) &&
20592 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20593 StoreNodes[StartIdx + 1].OffsetFromBase)
20594 ++StartIdx;
20595
20596 // Bail if we don't have enough candidates to merge.
20597 if (StartIdx + 1 >= StoreNodes.size())
20598 return 0;
20599
20600 // Trim stores that overlapped with the first store.
20601 if (StartIdx)
20602 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20603
20604 // Scan the memory operations on the chain and find the first
20605 // non-consecutive store memory address.
20606 unsigned NumConsecutiveStores = 1;
20607 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20608 // Check that the addresses are consecutive starting from the second
20609 // element in the list of stores.
20610 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20611 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20612 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20613 break;
20614 NumConsecutiveStores = i + 1;
20615 }
20616 if (NumConsecutiveStores > 1)
20617 return NumConsecutiveStores;
20618
20619 // There are no consecutive stores at the start of the list.
20620 // Remove the first store and try again.
20621 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20622 }
20623}
20624
20625bool DAGCombiner::tryStoreMergeOfConstants(
20626 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20627 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20628 LLVMContext &Context = *DAG.getContext();
20629 const DataLayout &DL = DAG.getDataLayout();
20630 int64_t ElementSizeBytes = MemVT.getStoreSize();
20631 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20632 bool MadeChange = false;
20633
20634 // Store the constants into memory as one consecutive store.
20635 while (NumConsecutiveStores >= 2) {
20636 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20637 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20638 Align FirstStoreAlign = FirstInChain->getAlign();
20639 unsigned LastLegalType = 1;
20640 unsigned LastLegalVectorType = 1;
20641 bool LastIntegerTrunc = false;
20642 bool NonZero = false;
20643 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20644 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20645 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20646 SDValue StoredVal = ST->getValue();
20647 bool IsElementZero = false;
20648 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20649 IsElementZero = C->isZero();
20650 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20651 IsElementZero = C->getConstantFPValue()->isNullValue();
20652 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20653 IsElementZero = true;
20654 if (IsElementZero) {
20655 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20656 FirstZeroAfterNonZero = i;
20657 }
20658 NonZero |= !IsElementZero;
20659
20660 // Find a legal type for the constant store.
20661 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20662 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20663 unsigned IsFast = 0;
20664
20665 // Break early when size is too large to be legal.
20666 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20667 break;
20668
20669 if (TLI.isTypeLegal(StoreTy) &&
20670 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20671 DAG.getMachineFunction()) &&
20672 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20673 *FirstInChain->getMemOperand(), &IsFast) &&
20674 IsFast) {
20675 LastIntegerTrunc = false;
20676 LastLegalType = i + 1;
20677 // Or check whether a truncstore is legal.
20678 } else if (TLI.getTypeAction(Context, StoreTy) ==
20680 EVT LegalizedStoredValTy =
20681 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20682 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20683 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20684 DAG.getMachineFunction()) &&
20685 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20686 *FirstInChain->getMemOperand(), &IsFast) &&
20687 IsFast) {
20688 LastIntegerTrunc = true;
20689 LastLegalType = i + 1;
20690 }
20691 }
20692
20693 // We only use vectors if the target allows it and the function is not
20694 // marked with the noimplicitfloat attribute.
20695 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20696 AllowVectors) {
20697 // Find a legal type for the vector store.
20698 unsigned Elts = (i + 1) * NumMemElts;
20699 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20700 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20701 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20702 TLI.allowsMemoryAccess(Context, DL, Ty,
20703 *FirstInChain->getMemOperand(), &IsFast) &&
20704 IsFast)
20705 LastLegalVectorType = i + 1;
20706 }
20707 }
20708
20709 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20710 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20711 bool UseTrunc = LastIntegerTrunc && !UseVector;
20712
20713 // Check if we found a legal integer type that creates a meaningful
20714 // merge.
20715 if (NumElem < 2) {
20716 // We know that candidate stores are in order and of correct
20717 // shape. While there is no mergeable sequence from the
20718 // beginning one may start later in the sequence. The only
20719 // reason a merge of size N could have failed where another of
20720 // the same size would not have, is if the alignment has
20721 // improved or we've dropped a non-zero value. Drop as many
20722 // candidates as we can here.
20723 unsigned NumSkip = 1;
20724 while ((NumSkip < NumConsecutiveStores) &&
20725 (NumSkip < FirstZeroAfterNonZero) &&
20726 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20727 NumSkip++;
20728
20729 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20730 NumConsecutiveStores -= NumSkip;
20731 continue;
20732 }
20733
20734 // Check that we can merge these candidates without causing a cycle.
20735 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20736 RootNode)) {
20737 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20738 NumConsecutiveStores -= NumElem;
20739 continue;
20740 }
20741
20742 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20743 /*IsConstantSrc*/ true,
20744 UseVector, UseTrunc);
20745
20746 // Remove merged stores for next iteration.
20747 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20748 NumConsecutiveStores -= NumElem;
20749 }
20750 return MadeChange;
20751}
20752
20753bool DAGCombiner::tryStoreMergeOfExtracts(
20754 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20755 EVT MemVT, SDNode *RootNode) {
20756 LLVMContext &Context = *DAG.getContext();
20757 const DataLayout &DL = DAG.getDataLayout();
20758 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20759 bool MadeChange = false;
20760
20761 // Loop on Consecutive Stores on success.
20762 while (NumConsecutiveStores >= 2) {
20763 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20764 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20765 Align FirstStoreAlign = FirstInChain->getAlign();
20766 unsigned NumStoresToMerge = 1;
20767 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20768 // Find a legal type for the vector store.
20769 unsigned Elts = (i + 1) * NumMemElts;
20770 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20771 unsigned IsFast = 0;
20772
20773 // Break early when size is too large to be legal.
20774 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20775 break;
20776
20777 if (TLI.isTypeLegal(Ty) &&
20778 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20779 TLI.allowsMemoryAccess(Context, DL, Ty,
20780 *FirstInChain->getMemOperand(), &IsFast) &&
20781 IsFast)
20782 NumStoresToMerge = i + 1;
20783 }
20784
20785 // Check if we found a legal integer type creating a meaningful
20786 // merge.
20787 if (NumStoresToMerge < 2) {
20788 // We know that candidate stores are in order and of correct
20789 // shape. While there is no mergeable sequence from the
20790 // beginning one may start later in the sequence. The only
20791 // reason a merge of size N could have failed where another of
20792 // the same size would not have, is if the alignment has
20793 // improved. Drop as many candidates as we can here.
20794 unsigned NumSkip = 1;
20795 while ((NumSkip < NumConsecutiveStores) &&
20796 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20797 NumSkip++;
20798
20799 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20800 NumConsecutiveStores -= NumSkip;
20801 continue;
20802 }
20803
20804 // Check that we can merge these candidates without causing a cycle.
20805 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20806 RootNode)) {
20807 StoreNodes.erase(StoreNodes.begin(),
20808 StoreNodes.begin() + NumStoresToMerge);
20809 NumConsecutiveStores -= NumStoresToMerge;
20810 continue;
20811 }
20812
20813 MadeChange |= mergeStoresOfConstantsOrVecElts(
20814 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20815 /*UseVector*/ true, /*UseTrunc*/ false);
20816
20817 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20818 NumConsecutiveStores -= NumStoresToMerge;
20819 }
20820 return MadeChange;
20821}
20822
20823bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20824 unsigned NumConsecutiveStores, EVT MemVT,
20825 SDNode *RootNode, bool AllowVectors,
20826 bool IsNonTemporalStore,
20827 bool IsNonTemporalLoad) {
20828 LLVMContext &Context = *DAG.getContext();
20829 const DataLayout &DL = DAG.getDataLayout();
20830 int64_t ElementSizeBytes = MemVT.getStoreSize();
20831 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20832 bool MadeChange = false;
20833
20834 // Look for load nodes which are used by the stored values.
20835 SmallVector<MemOpLink, 8> LoadNodes;
20836
20837 // Find acceptable loads. Loads need to have the same chain (token factor),
20838 // must not be zext, volatile, indexed, and they must be consecutive.
20839 BaseIndexOffset LdBasePtr;
20840
20841 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20842 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20844 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20845
20846 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20847 // If this is not the first ptr that we check.
20848 int64_t LdOffset = 0;
20849 if (LdBasePtr.getBase().getNode()) {
20850 // The base ptr must be the same.
20851 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20852 break;
20853 } else {
20854 // Check that all other base pointers are the same as this one.
20855 LdBasePtr = LdPtr;
20856 }
20857
20858 // We found a potential memory operand to merge.
20859 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20860 }
20861
20862 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20863 Align RequiredAlignment;
20864 bool NeedRotate = false;
20865 if (LoadNodes.size() == 2) {
20866 // If we have load/store pair instructions and we only have two values,
20867 // don't bother merging.
20868 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20869 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20870 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20871 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20872 break;
20873 }
20874 // If the loads are reversed, see if we can rotate the halves into place.
20875 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20876 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20877 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20878 if (Offset0 - Offset1 == ElementSizeBytes &&
20879 (hasOperation(ISD::ROTL, PairVT) ||
20880 hasOperation(ISD::ROTR, PairVT))) {
20881 std::swap(LoadNodes[0], LoadNodes[1]);
20882 NeedRotate = true;
20883 }
20884 }
20885 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20886 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20887 Align FirstStoreAlign = FirstInChain->getAlign();
20888 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20889
20890 // Scan the memory operations on the chain and find the first
20891 // non-consecutive load memory address. These variables hold the index in
20892 // the store node array.
20893
20894 unsigned LastConsecutiveLoad = 1;
20895
20896 // This variable refers to the size and not index in the array.
20897 unsigned LastLegalVectorType = 1;
20898 unsigned LastLegalIntegerType = 1;
20899 bool isDereferenceable = true;
20900 bool DoIntegerTruncate = false;
20901 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20902 SDValue LoadChain = FirstLoad->getChain();
20903 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20904 // All loads must share the same chain.
20905 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20906 break;
20907
20908 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20909 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20910 break;
20911 LastConsecutiveLoad = i;
20912
20913 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20914 isDereferenceable = false;
20915
20916 // Find a legal type for the vector store.
20917 unsigned Elts = (i + 1) * NumMemElts;
20918 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20919
20920 // Break early when size is too large to be legal.
20921 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20922 break;
20923
20924 unsigned IsFastSt = 0;
20925 unsigned IsFastLd = 0;
20926 // Don't try vector types if we need a rotate. We may still fail the
20927 // legality checks for the integer type, but we can't handle the rotate
20928 // case with vectors.
20929 // FIXME: We could use a shuffle in place of the rotate.
20930 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20931 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20932 DAG.getMachineFunction()) &&
20933 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20934 *FirstInChain->getMemOperand(), &IsFastSt) &&
20935 IsFastSt &&
20936 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20937 *FirstLoad->getMemOperand(), &IsFastLd) &&
20938 IsFastLd) {
20939 LastLegalVectorType = i + 1;
20940 }
20941
20942 // Find a legal type for the integer store.
20943 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20944 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20945 if (TLI.isTypeLegal(StoreTy) &&
20946 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20947 DAG.getMachineFunction()) &&
20948 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20949 *FirstInChain->getMemOperand(), &IsFastSt) &&
20950 IsFastSt &&
20951 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20952 *FirstLoad->getMemOperand(), &IsFastLd) &&
20953 IsFastLd) {
20954 LastLegalIntegerType = i + 1;
20955 DoIntegerTruncate = false;
20956 // Or check whether a truncstore and extload is legal.
20957 } else if (TLI.getTypeAction(Context, StoreTy) ==
20959 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20960 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20961 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20962 DAG.getMachineFunction()) &&
20963 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20964 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20965 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20966 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20967 *FirstInChain->getMemOperand(), &IsFastSt) &&
20968 IsFastSt &&
20969 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20970 *FirstLoad->getMemOperand(), &IsFastLd) &&
20971 IsFastLd) {
20972 LastLegalIntegerType = i + 1;
20973 DoIntegerTruncate = true;
20974 }
20975 }
20976 }
20977
20978 // Only use vector types if the vector type is larger than the integer
20979 // type. If they are the same, use integers.
20980 bool UseVectorTy =
20981 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20982 unsigned LastLegalType =
20983 std::max(LastLegalVectorType, LastLegalIntegerType);
20984
20985 // We add +1 here because the LastXXX variables refer to location while
20986 // the NumElem refers to array/index size.
20987 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20988 NumElem = std::min(LastLegalType, NumElem);
20989 Align FirstLoadAlign = FirstLoad->getAlign();
20990
20991 if (NumElem < 2) {
20992 // We know that candidate stores are in order and of correct
20993 // shape. While there is no mergeable sequence from the
20994 // beginning one may start later in the sequence. The only
20995 // reason a merge of size N could have failed where another of
20996 // the same size would not have is if the alignment or either
20997 // the load or store has improved. Drop as many candidates as we
20998 // can here.
20999 unsigned NumSkip = 1;
21000 while ((NumSkip < LoadNodes.size()) &&
21001 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21002 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21003 NumSkip++;
21004 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21005 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
21006 NumConsecutiveStores -= NumSkip;
21007 continue;
21008 }
21009
21010 // Check that we can merge these candidates without causing a cycle.
21011 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21012 RootNode)) {
21013 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21014 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21015 NumConsecutiveStores -= NumElem;
21016 continue;
21017 }
21018
21019 // Find if it is better to use vectors or integers to load and store
21020 // to memory.
21021 EVT JointMemOpVT;
21022 if (UseVectorTy) {
21023 // Find a legal type for the vector store.
21024 unsigned Elts = NumElem * NumMemElts;
21025 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21026 } else {
21027 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21028 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21029 }
21030
21031 SDLoc LoadDL(LoadNodes[0].MemNode);
21032 SDLoc StoreDL(StoreNodes[0].MemNode);
21033
21034 // The merged loads are required to have the same incoming chain, so
21035 // using the first's chain is acceptable.
21036
21037 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21038 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21039 AddToWorklist(NewStoreChain.getNode());
21040
21041 MachineMemOperand::Flags LdMMOFlags =
21042 isDereferenceable ? MachineMemOperand::MODereferenceable
21044 if (IsNonTemporalLoad)
21046
21047 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21048
21049 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21052
21053 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21054
21055 SDValue NewLoad, NewStore;
21056 if (UseVectorTy || !DoIntegerTruncate) {
21057 NewLoad = DAG.getLoad(
21058 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21059 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21060 SDValue StoreOp = NewLoad;
21061 if (NeedRotate) {
21062 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21063 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21064 "Unexpected type for rotate-able load pair");
21065 SDValue RotAmt =
21066 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21067 // Target can convert to the identical ROTR if it does not have ROTL.
21068 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21069 }
21070 NewStore = DAG.getStore(
21071 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21072 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21073 : MachinePointerInfo(FirstStoreAS),
21074 FirstStoreAlign, StMMOFlags);
21075 } else { // This must be the truncstore/extload case
21076 EVT ExtendedTy =
21077 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21078 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21079 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21080 FirstLoad->getPointerInfo(), JointMemOpVT,
21081 FirstLoadAlign, LdMMOFlags);
21082 NewStore = DAG.getTruncStore(
21083 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21084 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21085 : MachinePointerInfo(FirstStoreAS),
21086 JointMemOpVT, FirstInChain->getAlign(),
21087 FirstInChain->getMemOperand()->getFlags());
21088 }
21089
21090 // Transfer chain users from old loads to the new load.
21091 for (unsigned i = 0; i < NumElem; ++i) {
21092 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21094 SDValue(NewLoad.getNode(), 1));
21095 }
21096
21097 // Replace all stores with the new store. Recursively remove corresponding
21098 // values if they are no longer used.
21099 for (unsigned i = 0; i < NumElem; ++i) {
21100 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21101 CombineTo(StoreNodes[i].MemNode, NewStore);
21102 if (Val->use_empty())
21103 recursivelyDeleteUnusedNodes(Val.getNode());
21104 }
21105
21106 MadeChange = true;
21107 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21108 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21109 NumConsecutiveStores -= NumElem;
21110 }
21111 return MadeChange;
21112}
21113
21114bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21115 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21116 return false;
21117
21118 // TODO: Extend this function to merge stores of scalable vectors.
21119 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21120 // store since we know <vscale x 16 x i8> is exactly twice as large as
21121 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21122 EVT MemVT = St->getMemoryVT();
21123 if (MemVT.isScalableVT())
21124 return false;
21125 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21126 return false;
21127
21128 // This function cannot currently deal with non-byte-sized memory sizes.
21129 int64_t ElementSizeBytes = MemVT.getStoreSize();
21130 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21131 return false;
21132
21133 // Do not bother looking at stored values that are not constants, loads, or
21134 // extracted vector elements.
21135 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21136 const StoreSource StoreSrc = getStoreSource(StoredVal);
21137 if (StoreSrc == StoreSource::Unknown)
21138 return false;
21139
21140 SmallVector<MemOpLink, 8> StoreNodes;
21141 SDNode *RootNode;
21142 // Find potential store merge candidates by searching through chain sub-DAG
21143 getStoreMergeCandidates(St, StoreNodes, RootNode);
21144
21145 // Check if there is anything to merge.
21146 if (StoreNodes.size() < 2)
21147 return false;
21148
21149 // Sort the memory operands according to their distance from the
21150 // base pointer.
21151 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21152 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21153 });
21154
21155 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21156 Attribute::NoImplicitFloat);
21157 bool IsNonTemporalStore = St->isNonTemporal();
21158 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21159 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21160
21161 // Store Merge attempts to merge the lowest stores. This generally
21162 // works out as if successful, as the remaining stores are checked
21163 // after the first collection of stores is merged. However, in the
21164 // case that a non-mergeable store is found first, e.g., {p[-2],
21165 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21166 // mergeable cases. To prevent this, we prune such stores from the
21167 // front of StoreNodes here.
21168 bool MadeChange = false;
21169 while (StoreNodes.size() > 1) {
21170 unsigned NumConsecutiveStores =
21171 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21172 // There are no more stores in the list to examine.
21173 if (NumConsecutiveStores == 0)
21174 return MadeChange;
21175
21176 // We have at least 2 consecutive stores. Try to merge them.
21177 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21178 switch (StoreSrc) {
21179 case StoreSource::Constant:
21180 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21181 MemVT, RootNode, AllowVectors);
21182 break;
21183
21184 case StoreSource::Extract:
21185 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21186 MemVT, RootNode);
21187 break;
21188
21189 case StoreSource::Load:
21190 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21191 MemVT, RootNode, AllowVectors,
21192 IsNonTemporalStore, IsNonTemporalLoad);
21193 break;
21194
21195 default:
21196 llvm_unreachable("Unhandled store source type");
21197 }
21198 }
21199 return MadeChange;
21200}
21201
21202SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21203 SDLoc SL(ST);
21204 SDValue ReplStore;
21205
21206 // Replace the chain to avoid dependency.
21207 if (ST->isTruncatingStore()) {
21208 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21209 ST->getBasePtr(), ST->getMemoryVT(),
21210 ST->getMemOperand());
21211 } else {
21212 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21213 ST->getMemOperand());
21214 }
21215
21216 // Create token to keep both nodes around.
21217 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21218 MVT::Other, ST->getChain(), ReplStore);
21219
21220 // Make sure the new and old chains are cleaned up.
21221 AddToWorklist(Token.getNode());
21222
21223 // Don't add users to work list.
21224 return CombineTo(ST, Token, false);
21225}
21226
21227SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21228 SDValue Value = ST->getValue();
21229 if (Value.getOpcode() == ISD::TargetConstantFP)
21230 return SDValue();
21231
21232 if (!ISD::isNormalStore(ST))
21233 return SDValue();
21234
21235 SDLoc DL(ST);
21236
21237 SDValue Chain = ST->getChain();
21238 SDValue Ptr = ST->getBasePtr();
21239
21240 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21241
21242 // NOTE: If the original store is volatile, this transform must not increase
21243 // the number of stores. For example, on x86-32 an f64 can be stored in one
21244 // processor operation but an i64 (which is not legal) requires two. So the
21245 // transform should not be done in this case.
21246
21247 SDValue Tmp;
21248 switch (CFP->getSimpleValueType(0).SimpleTy) {
21249 default:
21250 llvm_unreachable("Unknown FP type");
21251 case MVT::f16: // We don't do this for these yet.
21252 case MVT::bf16:
21253 case MVT::f80:
21254 case MVT::f128:
21255 case MVT::ppcf128:
21256 return SDValue();
21257 case MVT::f32:
21258 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21259 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21260 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21261 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21262 MVT::i32);
21263 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21264 }
21265
21266 return SDValue();
21267 case MVT::f64:
21268 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21269 ST->isSimple()) ||
21270 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21271 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21272 getZExtValue(), SDLoc(CFP), MVT::i64);
21273 return DAG.getStore(Chain, DL, Tmp,
21274 Ptr, ST->getMemOperand());
21275 }
21276
21277 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21278 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21279 // Many FP stores are not made apparent until after legalize, e.g. for
21280 // argument passing. Since this is so common, custom legalize the
21281 // 64-bit integer store into two 32-bit stores.
21283 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21284 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21285 if (DAG.getDataLayout().isBigEndian())
21286 std::swap(Lo, Hi);
21287
21288 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21289 AAMDNodes AAInfo = ST->getAAInfo();
21290
21291 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21292 ST->getOriginalAlign(), MMOFlags, AAInfo);
21294 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21295 ST->getPointerInfo().getWithOffset(4),
21296 ST->getOriginalAlign(), MMOFlags, AAInfo);
21297 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21298 St0, St1);
21299 }
21300
21301 return SDValue();
21302 }
21303}
21304
21305// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21306//
21307// If a store of a load with an element inserted into it has no other
21308// uses in between the chain, then we can consider the vector store
21309// dead and replace it with just the single scalar element store.
21310SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21311 SDLoc DL(ST);
21312 SDValue Value = ST->getValue();
21313 SDValue Ptr = ST->getBasePtr();
21314 SDValue Chain = ST->getChain();
21315 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21316 return SDValue();
21317
21318 SDValue Elt = Value.getOperand(1);
21319 SDValue Idx = Value.getOperand(2);
21320
21321 // If the element isn't byte sized or is implicitly truncated then we can't
21322 // compute an offset.
21323 EVT EltVT = Elt.getValueType();
21324 if (!EltVT.isByteSized() ||
21325 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21326 return SDValue();
21327
21328 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21329 if (!Ld || Ld->getBasePtr() != Ptr ||
21330 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21331 !ISD::isNormalStore(ST) ||
21332 Ld->getAddressSpace() != ST->getAddressSpace() ||
21334 return SDValue();
21335
21336 unsigned IsFast;
21337 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21338 Elt.getValueType(), ST->getAddressSpace(),
21339 ST->getAlign(), ST->getMemOperand()->getFlags(),
21340 &IsFast) ||
21341 !IsFast)
21342 return SDValue();
21343
21344 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21345
21346 // If the offset is a known constant then try to recover the pointer
21347 // info
21348 SDValue NewPtr;
21349 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21350 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21351 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21352 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21353 } else {
21354 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21355 }
21356
21357 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21358 ST->getMemOperand()->getFlags());
21359}
21360
21361SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21362 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21363 SDValue Val = ST->getVal();
21364 EVT VT = Val.getValueType();
21365 EVT MemVT = ST->getMemoryVT();
21366
21367 if (MemVT.bitsLT(VT)) { // Is truncating store
21368 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21369 MemVT.getScalarSizeInBits());
21370 // See if we can simplify the operation with SimplifyDemandedBits, which
21371 // only works if the value has a single use.
21372 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21373 return SDValue(N, 0);
21374 }
21375
21376 return SDValue();
21377}
21378
21379SDValue DAGCombiner::visitSTORE(SDNode *N) {
21380 StoreSDNode *ST = cast<StoreSDNode>(N);
21381 SDValue Chain = ST->getChain();
21382 SDValue Value = ST->getValue();
21383 SDValue Ptr = ST->getBasePtr();
21384
21385 // If this is a store of a bit convert, store the input value if the
21386 // resultant store does not need a higher alignment than the original.
21387 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21388 ST->isUnindexed()) {
21389 EVT SVT = Value.getOperand(0).getValueType();
21390 // If the store is volatile, we only want to change the store type if the
21391 // resulting store is legal. Otherwise we might increase the number of
21392 // memory accesses. We don't care if the original type was legal or not
21393 // as we assume software couldn't rely on the number of accesses of an
21394 // illegal type.
21395 // TODO: May be able to relax for unordered atomics (see D66309)
21396 if (((!LegalOperations && ST->isSimple()) ||
21397 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21398 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21399 DAG, *ST->getMemOperand())) {
21400 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21401 ST->getMemOperand());
21402 }
21403 }
21404
21405 // Turn 'store undef, Ptr' -> nothing.
21406 if (Value.isUndef() && ST->isUnindexed())
21407 return Chain;
21408
21409 // Try to infer better alignment information than the store already has.
21410 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21411 !ST->isAtomic()) {
21412 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21413 if (*Alignment > ST->getAlign() &&
21414 isAligned(*Alignment, ST->getSrcValueOffset())) {
21415 SDValue NewStore =
21416 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21417 ST->getMemoryVT(), *Alignment,
21418 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21419 // NewStore will always be N as we are only refining the alignment
21420 assert(NewStore.getNode() == N);
21421 (void)NewStore;
21422 }
21423 }
21424 }
21425
21426 // Try transforming a pair floating point load / store ops to integer
21427 // load / store ops.
21428 if (SDValue NewST = TransformFPLoadStorePair(N))
21429 return NewST;
21430
21431 // Try transforming several stores into STORE (BSWAP).
21432 if (SDValue Store = mergeTruncStores(ST))
21433 return Store;
21434
21435 if (ST->isUnindexed()) {
21436 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21437 // adjacent stores.
21438 if (findBetterNeighborChains(ST)) {
21439 // replaceStoreChain uses CombineTo, which handled all of the worklist
21440 // manipulation. Return the original node to not do anything else.
21441 return SDValue(ST, 0);
21442 }
21443 Chain = ST->getChain();
21444 }
21445
21446 // FIXME: is there such a thing as a truncating indexed store?
21447 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21448 Value.getValueType().isInteger() &&
21449 (!isa<ConstantSDNode>(Value) ||
21450 !cast<ConstantSDNode>(Value)->isOpaque())) {
21451 // Convert a truncating store of a extension into a standard store.
21452 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21453 Value.getOpcode() == ISD::SIGN_EXTEND ||
21454 Value.getOpcode() == ISD::ANY_EXTEND) &&
21455 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21456 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21457 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21458 ST->getMemOperand());
21459
21460 APInt TruncDemandedBits =
21461 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21462 ST->getMemoryVT().getScalarSizeInBits());
21463
21464 // See if we can simplify the operation with SimplifyDemandedBits, which
21465 // only works if the value has a single use.
21466 AddToWorklist(Value.getNode());
21467 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21468 // Re-visit the store if anything changed and the store hasn't been merged
21469 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21470 // node back to the worklist if necessary, but we also need to re-visit
21471 // the Store node itself.
21472 if (N->getOpcode() != ISD::DELETED_NODE)
21473 AddToWorklist(N);
21474 return SDValue(N, 0);
21475 }
21476
21477 // Otherwise, see if we can simplify the input to this truncstore with
21478 // knowledge that only the low bits are being used. For example:
21479 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21480 if (SDValue Shorter =
21481 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21482 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21483 ST->getMemOperand());
21484
21485 // If we're storing a truncated constant, see if we can simplify it.
21486 // TODO: Move this to targetShrinkDemandedConstant?
21487 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21488 if (!Cst->isOpaque()) {
21489 const APInt &CValue = Cst->getAPIntValue();
21490 APInt NewVal = CValue & TruncDemandedBits;
21491 if (NewVal != CValue) {
21492 SDValue Shorter =
21493 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21494 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21495 ST->getMemoryVT(), ST->getMemOperand());
21496 }
21497 }
21498 }
21499
21500 // If this is a load followed by a store to the same location, then the store
21501 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21502 // TODO: Add big-endian truncate support with test coverage.
21503 // TODO: Can relax for unordered atomics (see D66309)
21504 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21506 : Value;
21507 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21508 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21509 ST->isUnindexed() && ST->isSimple() &&
21510 Ld->getAddressSpace() == ST->getAddressSpace() &&
21511 // There can't be any side effects between the load and store, such as
21512 // a call or store.
21514 // The store is dead, remove it.
21515 return Chain;
21516 }
21517 }
21518
21519 // Try scalarizing vector stores of loads where we only change one element
21520 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21521 return NewST;
21522
21523 // TODO: Can relax for unordered atomics (see D66309)
21524 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21525 if (ST->isUnindexed() && ST->isSimple() &&
21526 ST1->isUnindexed() && ST1->isSimple()) {
21527 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21528 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21529 ST->getAddressSpace() == ST1->getAddressSpace()) {
21530 // If this is a store followed by a store with the same value to the
21531 // same location, then the store is dead/noop.
21532 return Chain;
21533 }
21534
21535 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21536 !ST1->getBasePtr().isUndef() &&
21537 ST->getAddressSpace() == ST1->getAddressSpace()) {
21538 // If we consider two stores and one smaller in size is a scalable
21539 // vector type and another one a bigger size store with a fixed type,
21540 // then we could not allow the scalable store removal because we don't
21541 // know its final size in the end.
21542 if (ST->getMemoryVT().isScalableVector() ||
21543 ST1->getMemoryVT().isScalableVector()) {
21544 if (ST1->getBasePtr() == Ptr &&
21545 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21546 ST->getMemoryVT().getStoreSize())) {
21547 CombineTo(ST1, ST1->getChain());
21548 return SDValue(N, 0);
21549 }
21550 } else {
21551 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21552 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21553 // If this is a store who's preceding store to a subset of the current
21554 // location and no one other node is chained to that store we can
21555 // effectively drop the store. Do not remove stores to undef as they
21556 // may be used as data sinks.
21557 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21558 ChainBase,
21559 ST1->getMemoryVT().getFixedSizeInBits())) {
21560 CombineTo(ST1, ST1->getChain());
21561 return SDValue(N, 0);
21562 }
21563 }
21564 }
21565 }
21566 }
21567
21568 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21569 // truncating store. We can do this even if this is already a truncstore.
21570 if ((Value.getOpcode() == ISD::FP_ROUND ||
21571 Value.getOpcode() == ISD::TRUNCATE) &&
21572 Value->hasOneUse() && ST->isUnindexed() &&
21573 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21574 ST->getMemoryVT(), LegalOperations)) {
21575 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21576 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21577 }
21578
21579 // Always perform this optimization before types are legal. If the target
21580 // prefers, also try this after legalization to catch stores that were created
21581 // by intrinsics or other nodes.
21582 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21583 while (true) {
21584 // There can be multiple store sequences on the same chain.
21585 // Keep trying to merge store sequences until we are unable to do so
21586 // or until we merge the last store on the chain.
21587 bool Changed = mergeConsecutiveStores(ST);
21588 if (!Changed) break;
21589 // Return N as merge only uses CombineTo and no worklist clean
21590 // up is necessary.
21591 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21592 return SDValue(N, 0);
21593 }
21594 }
21595
21596 // Try transforming N to an indexed store.
21597 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21598 return SDValue(N, 0);
21599
21600 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21601 //
21602 // Make sure to do this only after attempting to merge stores in order to
21603 // avoid changing the types of some subset of stores due to visit order,
21604 // preventing their merging.
21605 if (isa<ConstantFPSDNode>(ST->getValue())) {
21606 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21607 return NewSt;
21608 }
21609
21610 if (SDValue NewSt = splitMergedValStore(ST))
21611 return NewSt;
21612
21613 return ReduceLoadOpStoreWidth(N);
21614}
21615
21616SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21617 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21618 if (!LifetimeEnd->hasOffset())
21619 return SDValue();
21620
21621 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21622 LifetimeEnd->getOffset(), false);
21623
21624 // We walk up the chains to find stores.
21625 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21626 while (!Chains.empty()) {
21627 SDValue Chain = Chains.pop_back_val();
21628 if (!Chain.hasOneUse())
21629 continue;
21630 switch (Chain.getOpcode()) {
21631 case ISD::TokenFactor:
21632 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21633 Chains.push_back(Chain.getOperand(--Nops));
21634 break;
21636 case ISD::LIFETIME_END:
21637 // We can forward past any lifetime start/end that can be proven not to
21638 // alias the node.
21639 if (!mayAlias(Chain.getNode(), N))
21640 Chains.push_back(Chain.getOperand(0));
21641 break;
21642 case ISD::STORE: {
21643 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21644 // TODO: Can relax for unordered atomics (see D66309)
21645 if (!ST->isSimple() || ST->isIndexed())
21646 continue;
21647 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21648 // The bounds of a scalable store are not known until runtime, so this
21649 // store cannot be elided.
21650 if (StoreSize.isScalable())
21651 continue;
21652 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21653 // If we store purely within object bounds just before its lifetime ends,
21654 // we can remove the store.
21655 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21656 StoreSize.getFixedValue() * 8)) {
21657 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21658 dbgs() << "\nwithin LIFETIME_END of : ";
21659 LifetimeEndBase.dump(); dbgs() << "\n");
21660 CombineTo(ST, ST->getChain());
21661 return SDValue(N, 0);
21662 }
21663 }
21664 }
21665 }
21666 return SDValue();
21667}
21668
21669/// For the instruction sequence of store below, F and I values
21670/// are bundled together as an i64 value before being stored into memory.
21671/// Sometimes it is more efficent to generate separate stores for F and I,
21672/// which can remove the bitwise instructions or sink them to colder places.
21673///
21674/// (store (or (zext (bitcast F to i32) to i64),
21675/// (shl (zext I to i64), 32)), addr) -->
21676/// (store F, addr) and (store I, addr+4)
21677///
21678/// Similarly, splitting for other merged store can also be beneficial, like:
21679/// For pair of {i32, i32}, i64 store --> two i32 stores.
21680/// For pair of {i32, i16}, i64 store --> two i32 stores.
21681/// For pair of {i16, i16}, i32 store --> two i16 stores.
21682/// For pair of {i16, i8}, i32 store --> two i16 stores.
21683/// For pair of {i8, i8}, i16 store --> two i8 stores.
21684///
21685/// We allow each target to determine specifically which kind of splitting is
21686/// supported.
21687///
21688/// The store patterns are commonly seen from the simple code snippet below
21689/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21690/// void goo(const std::pair<int, float> &);
21691/// hoo() {
21692/// ...
21693/// goo(std::make_pair(tmp, ftmp));
21694/// ...
21695/// }
21696///
21697SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21698 if (OptLevel == CodeGenOptLevel::None)
21699 return SDValue();
21700
21701 // Can't change the number of memory accesses for a volatile store or break
21702 // atomicity for an atomic one.
21703 if (!ST->isSimple())
21704 return SDValue();
21705
21706 SDValue Val = ST->getValue();
21707 SDLoc DL(ST);
21708
21709 // Match OR operand.
21710 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21711 return SDValue();
21712
21713 // Match SHL operand and get Lower and Higher parts of Val.
21714 SDValue Op1 = Val.getOperand(0);
21715 SDValue Op2 = Val.getOperand(1);
21716 SDValue Lo, Hi;
21717 if (Op1.getOpcode() != ISD::SHL) {
21718 std::swap(Op1, Op2);
21719 if (Op1.getOpcode() != ISD::SHL)
21720 return SDValue();
21721 }
21722 Lo = Op2;
21723 Hi = Op1.getOperand(0);
21724 if (!Op1.hasOneUse())
21725 return SDValue();
21726
21727 // Match shift amount to HalfValBitSize.
21728 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21729 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21730 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21731 return SDValue();
21732
21733 // Lo and Hi are zero-extended from int with size less equal than 32
21734 // to i64.
21735 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21736 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21737 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21738 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21739 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21740 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21741 return SDValue();
21742
21743 // Use the EVT of low and high parts before bitcast as the input
21744 // of target query.
21745 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21746 ? Lo.getOperand(0).getValueType()
21747 : Lo.getValueType();
21748 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21749 ? Hi.getOperand(0).getValueType()
21750 : Hi.getValueType();
21751 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21752 return SDValue();
21753
21754 // Start to split store.
21755 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21756 AAMDNodes AAInfo = ST->getAAInfo();
21757
21758 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21759 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21760 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21761 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21762
21763 SDValue Chain = ST->getChain();
21764 SDValue Ptr = ST->getBasePtr();
21765 // Lower value store.
21766 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21767 ST->getOriginalAlign(), MMOFlags, AAInfo);
21768 Ptr =
21769 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21770 // Higher value store.
21771 SDValue St1 = DAG.getStore(
21772 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21773 ST->getOriginalAlign(), MMOFlags, AAInfo);
21774 return St1;
21775}
21776
21777// Merge an insertion into an existing shuffle:
21778// (insert_vector_elt (vector_shuffle X, Y, Mask),
21779// .(extract_vector_elt X, N), InsIndex)
21780// --> (vector_shuffle X, Y, NewMask)
21781// and variations where shuffle operands may be CONCAT_VECTORS.
21783 SmallVectorImpl<int> &NewMask, SDValue Elt,
21784 unsigned InsIndex) {
21785 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21786 !isa<ConstantSDNode>(Elt.getOperand(1)))
21787 return false;
21788
21789 // Vec's operand 0 is using indices from 0 to N-1 and
21790 // operand 1 from N to 2N - 1, where N is the number of
21791 // elements in the vectors.
21792 SDValue InsertVal0 = Elt.getOperand(0);
21793 int ElementOffset = -1;
21794
21795 // We explore the inputs of the shuffle in order to see if we find the
21796 // source of the extract_vector_elt. If so, we can use it to modify the
21797 // shuffle rather than perform an insert_vector_elt.
21799 ArgWorkList.emplace_back(Mask.size(), Y);
21800 ArgWorkList.emplace_back(0, X);
21801
21802 while (!ArgWorkList.empty()) {
21803 int ArgOffset;
21804 SDValue ArgVal;
21805 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21806
21807 if (ArgVal == InsertVal0) {
21808 ElementOffset = ArgOffset;
21809 break;
21810 }
21811
21812 // Peek through concat_vector.
21813 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21814 int CurrentArgOffset =
21815 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21816 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21817 for (SDValue Op : reverse(ArgVal->ops())) {
21818 CurrentArgOffset -= Step;
21819 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21820 }
21821
21822 // Make sure we went through all the elements and did not screw up index
21823 // computation.
21824 assert(CurrentArgOffset == ArgOffset);
21825 }
21826 }
21827
21828 // If we failed to find a match, see if we can replace an UNDEF shuffle
21829 // operand.
21830 if (ElementOffset == -1) {
21831 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21832 return false;
21833 ElementOffset = Mask.size();
21834 Y = InsertVal0;
21835 }
21836
21837 NewMask.assign(Mask.begin(), Mask.end());
21838 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21839 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21840 "NewMask[InsIndex] is out of bound");
21841 return true;
21842}
21843
21844// Merge an insertion into an existing shuffle:
21845// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21846// InsIndex)
21847// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21848// CONCAT_VECTORS.
21849SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21850 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21851 "Expected extract_vector_elt");
21852 SDValue InsertVal = N->getOperand(1);
21853 SDValue Vec = N->getOperand(0);
21854
21855 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21856 if (!SVN || !Vec.hasOneUse())
21857 return SDValue();
21858
21859 ArrayRef<int> Mask = SVN->getMask();
21860 SDValue X = Vec.getOperand(0);
21861 SDValue Y = Vec.getOperand(1);
21862
21863 SmallVector<int, 16> NewMask(Mask);
21864 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21865 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21866 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21867 if (LegalShuffle)
21868 return LegalShuffle;
21869 }
21870
21871 return SDValue();
21872}
21873
21874// Convert a disguised subvector insertion into a shuffle:
21875// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21876// bitcast(shuffle (bitcast V), (extended X), Mask)
21877// Note: We do not use an insert_subvector node because that requires a
21878// legal subvector type.
21879SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21880 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21881 "Expected extract_vector_elt");
21882 SDValue InsertVal = N->getOperand(1);
21883
21884 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21885 !InsertVal.getOperand(0).getValueType().isVector())
21886 return SDValue();
21887
21888 SDValue SubVec = InsertVal.getOperand(0);
21889 SDValue DestVec = N->getOperand(0);
21890 EVT SubVecVT = SubVec.getValueType();
21891 EVT VT = DestVec.getValueType();
21892 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21893 // If the source only has a single vector element, the cost of creating adding
21894 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21895 if (NumSrcElts == 1)
21896 return SDValue();
21897 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21898 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21899
21900 // Step 1: Create a shuffle mask that implements this insert operation. The
21901 // vector that we are inserting into will be operand 0 of the shuffle, so
21902 // those elements are just 'i'. The inserted subvector is in the first
21903 // positions of operand 1 of the shuffle. Example:
21904 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21905 SmallVector<int, 16> Mask(NumMaskVals);
21906 for (unsigned i = 0; i != NumMaskVals; ++i) {
21907 if (i / NumSrcElts == InsIndex)
21908 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21909 else
21910 Mask[i] = i;
21911 }
21912
21913 // Bail out if the target can not handle the shuffle we want to create.
21914 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21915 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21916 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21917 return SDValue();
21918
21919 // Step 2: Create a wide vector from the inserted source vector by appending
21920 // undefined elements. This is the same size as our destination vector.
21921 SDLoc DL(N);
21922 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21923 ConcatOps[0] = SubVec;
21924 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21925
21926 // Step 3: Shuffle in the padded subvector.
21927 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21928 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21929 AddToWorklist(PaddedSubV.getNode());
21930 AddToWorklist(DestVecBC.getNode());
21931 AddToWorklist(Shuf.getNode());
21932 return DAG.getBitcast(VT, Shuf);
21933}
21934
21935// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21936// possible and the new load will be quick. We use more loads but less shuffles
21937// and inserts.
21938SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21939 EVT VT = N->getValueType(0);
21940
21941 // InsIndex is expected to be the first of last lane.
21942 if (!VT.isFixedLengthVector() ||
21943 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21944 return SDValue();
21945
21946 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21947 // depending on the InsIndex.
21948 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21949 SDValue Scalar = N->getOperand(1);
21950 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21951 return InsIndex == P.index() || P.value() < 0 ||
21952 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21953 (InsIndex == VT.getVectorNumElements() - 1 &&
21954 P.value() == (int)P.index() + 1);
21955 }))
21956 return SDValue();
21957
21958 // We optionally skip over an extend so long as both loads are extended in the
21959 // same way from the same type.
21960 unsigned Extend = 0;
21961 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21962 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21963 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21964 Extend = Scalar.getOpcode();
21965 Scalar = Scalar.getOperand(0);
21966 }
21967
21968 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21969 if (!ScalarLoad)
21970 return SDValue();
21971
21972 SDValue Vec = Shuffle->getOperand(0);
21973 if (Extend) {
21974 if (Vec.getOpcode() != Extend)
21975 return SDValue();
21976 Vec = Vec.getOperand(0);
21977 }
21978 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21979 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21980 return SDValue();
21981
21982 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21983 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21984 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21985 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21986 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21987 return SDValue();
21988
21989 // Check that the offset between the pointers to produce a single continuous
21990 // load.
21991 if (InsIndex == 0) {
21992 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21993 -1))
21994 return SDValue();
21995 } else {
21997 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21998 return SDValue();
21999 }
22000
22001 // And that the new unaligned load will be fast.
22002 unsigned IsFast = 0;
22003 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22004 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22005 Vec.getValueType(), VecLoad->getAddressSpace(),
22006 NewAlign, VecLoad->getMemOperand()->getFlags(),
22007 &IsFast) ||
22008 !IsFast)
22009 return SDValue();
22010
22011 // Calculate the new Ptr and create the new load.
22012 SDLoc DL(N);
22013 SDValue Ptr = ScalarLoad->getBasePtr();
22014 if (InsIndex != 0)
22015 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22016 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22017 MachinePointerInfo PtrInfo =
22018 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22019 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22020
22021 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22022 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22023 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22024 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22025 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22026}
22027
22028SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22029 SDValue InVec = N->getOperand(0);
22030 SDValue InVal = N->getOperand(1);
22031 SDValue EltNo = N->getOperand(2);
22032 SDLoc DL(N);
22033
22034 EVT VT = InVec.getValueType();
22035 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22036
22037 // Insert into out-of-bounds element is undefined.
22038 if (IndexC && VT.isFixedLengthVector() &&
22039 IndexC->getZExtValue() >= VT.getVectorNumElements())
22040 return DAG.getUNDEF(VT);
22041
22042 // Remove redundant insertions:
22043 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22044 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22045 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22046 return InVec;
22047
22048 if (!IndexC) {
22049 // If this is variable insert to undef vector, it might be better to splat:
22050 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22051 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22052 return DAG.getSplat(VT, DL, InVal);
22053 return SDValue();
22054 }
22055
22056 if (VT.isScalableVector())
22057 return SDValue();
22058
22059 unsigned NumElts = VT.getVectorNumElements();
22060
22061 // We must know which element is being inserted for folds below here.
22062 unsigned Elt = IndexC->getZExtValue();
22063
22064 // Handle <1 x ???> vector insertion special cases.
22065 if (NumElts == 1) {
22066 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22067 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22068 InVal.getOperand(0).getValueType() == VT &&
22069 isNullConstant(InVal.getOperand(1)))
22070 return InVal.getOperand(0);
22071 }
22072
22073 // Canonicalize insert_vector_elt dag nodes.
22074 // Example:
22075 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22076 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22077 //
22078 // Do this only if the child insert_vector node has one use; also
22079 // do this only if indices are both constants and Idx1 < Idx0.
22080 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22081 && isa<ConstantSDNode>(InVec.getOperand(2))) {
22082 unsigned OtherElt = InVec.getConstantOperandVal(2);
22083 if (Elt < OtherElt) {
22084 // Swap nodes.
22085 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22086 InVec.getOperand(0), InVal, EltNo);
22087 AddToWorklist(NewOp.getNode());
22088 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22089 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22090 }
22091 }
22092
22093 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22094 return Shuf;
22095
22096 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22097 return Shuf;
22098
22099 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22100 return Shuf;
22101
22102 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22103 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22104 // vXi1 vector - we don't need to recurse.
22105 if (NumElts == 1)
22106 return DAG.getBuildVector(VT, DL, {InVal});
22107
22108 // If we haven't already collected the element, insert into the op list.
22109 EVT MaxEltVT = InVal.getValueType();
22110 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22111 unsigned Idx) {
22112 if (!Ops[Idx]) {
22113 Ops[Idx] = Elt;
22114 if (VT.isInteger()) {
22115 EVT EltVT = Elt.getValueType();
22116 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22117 }
22118 }
22119 };
22120
22121 // Ensure all the operands are the same value type, fill any missing
22122 // operands with UNDEF and create the BUILD_VECTOR.
22123 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22124 assert(Ops.size() == NumElts && "Unexpected vector size");
22125 for (SDValue &Op : Ops) {
22126 if (Op)
22127 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22128 else
22129 Op = DAG.getUNDEF(MaxEltVT);
22130 }
22131 return DAG.getBuildVector(VT, DL, Ops);
22132 };
22133
22134 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22135 Ops[Elt] = InVal;
22136
22137 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22138 for (SDValue CurVec = InVec; CurVec;) {
22139 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22140 if (CurVec.isUndef())
22141 return CanonicalizeBuildVector(Ops);
22142
22143 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22144 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22145 for (unsigned I = 0; I != NumElts; ++I)
22146 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22147 return CanonicalizeBuildVector(Ops);
22148 }
22149
22150 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22151 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22152 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22153 return CanonicalizeBuildVector(Ops);
22154 }
22155
22156 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22157 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22158 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22159 if (CurIdx->getAPIntValue().ult(NumElts)) {
22160 unsigned Idx = CurIdx->getZExtValue();
22161 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22162
22163 // Found entire BUILD_VECTOR.
22164 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22165 return CanonicalizeBuildVector(Ops);
22166
22167 CurVec = CurVec->getOperand(0);
22168 continue;
22169 }
22170
22171 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22172 // update the shuffle mask (and second operand if we started with unary
22173 // shuffle) and create a new legal shuffle.
22174 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22175 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22176 SDValue LHS = SVN->getOperand(0);
22177 SDValue RHS = SVN->getOperand(1);
22179 bool Merged = true;
22180 for (auto I : enumerate(Ops)) {
22181 SDValue &Op = I.value();
22182 if (Op) {
22183 SmallVector<int, 16> NewMask;
22184 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22185 Merged = false;
22186 break;
22187 }
22188 Mask = std::move(NewMask);
22189 }
22190 }
22191 if (Merged)
22192 if (SDValue NewShuffle =
22193 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22194 return NewShuffle;
22195 }
22196
22197 // If all insertions are zero value, try to convert to AND mask.
22198 // TODO: Do this for -1 with OR mask?
22199 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22200 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22201 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22202 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22203 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22205 for (unsigned I = 0; I != NumElts; ++I)
22206 Mask[I] = Ops[I] ? Zero : AllOnes;
22207 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22208 DAG.getBuildVector(VT, DL, Mask));
22209 }
22210
22211 // Failed to find a match in the chain - bail.
22212 break;
22213 }
22214
22215 // See if we can fill in the missing constant elements as zeros.
22216 // TODO: Should we do this for any constant?
22217 APInt DemandedZeroElts = APInt::getZero(NumElts);
22218 for (unsigned I = 0; I != NumElts; ++I)
22219 if (!Ops[I])
22220 DemandedZeroElts.setBit(I);
22221
22222 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22223 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22224 : DAG.getConstantFP(0, DL, MaxEltVT);
22225 for (unsigned I = 0; I != NumElts; ++I)
22226 if (!Ops[I])
22227 Ops[I] = Zero;
22228
22229 return CanonicalizeBuildVector(Ops);
22230 }
22231 }
22232
22233 return SDValue();
22234}
22235
22236SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22237 SDValue EltNo,
22238 LoadSDNode *OriginalLoad) {
22239 assert(OriginalLoad->isSimple());
22240
22241 EVT ResultVT = EVE->getValueType(0);
22242 EVT VecEltVT = InVecVT.getVectorElementType();
22243
22244 // If the vector element type is not a multiple of a byte then we are unable
22245 // to correctly compute an address to load only the extracted element as a
22246 // scalar.
22247 if (!VecEltVT.isByteSized())
22248 return SDValue();
22249
22250 ISD::LoadExtType ExtTy =
22251 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22252 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22253 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22254 return SDValue();
22255
22256 Align Alignment = OriginalLoad->getAlign();
22258 SDLoc DL(EVE);
22259 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22260 int Elt = ConstEltNo->getZExtValue();
22261 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22262 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22263 Alignment = commonAlignment(Alignment, PtrOff);
22264 } else {
22265 // Discard the pointer info except the address space because the memory
22266 // operand can't represent this new access since the offset is variable.
22267 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22268 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22269 }
22270
22271 unsigned IsFast = 0;
22272 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22273 OriginalLoad->getAddressSpace(), Alignment,
22274 OriginalLoad->getMemOperand()->getFlags(),
22275 &IsFast) ||
22276 !IsFast)
22277 return SDValue();
22278
22279 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22280 InVecVT, EltNo);
22281
22282 // We are replacing a vector load with a scalar load. The new load must have
22283 // identical memory op ordering to the original.
22284 SDValue Load;
22285 if (ResultVT.bitsGT(VecEltVT)) {
22286 // If the result type of vextract is wider than the load, then issue an
22287 // extending load instead.
22288 ISD::LoadExtType ExtType =
22289 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22290 : ISD::EXTLOAD;
22291 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22292 NewPtr, MPI, VecEltVT, Alignment,
22293 OriginalLoad->getMemOperand()->getFlags(),
22294 OriginalLoad->getAAInfo());
22295 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22296 } else {
22297 // The result type is narrower or the same width as the vector element
22298 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22299 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22300 OriginalLoad->getAAInfo());
22301 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22302 if (ResultVT.bitsLT(VecEltVT))
22303 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22304 else
22305 Load = DAG.getBitcast(ResultVT, Load);
22306 }
22307 ++OpsNarrowed;
22308 return Load;
22309}
22310
22311/// Transform a vector binary operation into a scalar binary operation by moving
22312/// the math/logic after an extract element of a vector.
22314 const SDLoc &DL, bool LegalOperations) {
22315 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22316 SDValue Vec = ExtElt->getOperand(0);
22317 SDValue Index = ExtElt->getOperand(1);
22318 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22319 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22320 Vec->getNumValues() != 1)
22321 return SDValue();
22322
22323 // Targets may want to avoid this to prevent an expensive register transfer.
22324 if (!TLI.shouldScalarizeBinop(Vec))
22325 return SDValue();
22326
22327 // Extracting an element of a vector constant is constant-folded, so this
22328 // transform is just replacing a vector op with a scalar op while moving the
22329 // extract.
22330 SDValue Op0 = Vec.getOperand(0);
22331 SDValue Op1 = Vec.getOperand(1);
22332 APInt SplatVal;
22333 if (isAnyConstantBuildVector(Op0, true) ||
22334 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22335 isAnyConstantBuildVector(Op1, true) ||
22336 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22337 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22338 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22339 EVT VT = ExtElt->getValueType(0);
22340 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22341 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22342 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22343 }
22344
22345 return SDValue();
22346}
22347
22348// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22349// recursively analyse all of it's users. and try to model themselves as
22350// bit sequence extractions. If all of them agree on the new, narrower element
22351// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22352// new element type, do so now.
22353// This is mainly useful to recover from legalization that scalarized
22354// the vector as wide elements, but tries to rebuild it with narrower elements.
22355//
22356// Some more nodes could be modelled if that helps cover interesting patterns.
22357bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22358 SDNode *N) {
22359 // We perform this optimization post type-legalization because
22360 // the type-legalizer often scalarizes integer-promoted vectors.
22361 // Performing this optimization before may cause legalizaton cycles.
22362 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22363 return false;
22364
22365 // TODO: Add support for big-endian.
22366 if (DAG.getDataLayout().isBigEndian())
22367 return false;
22368
22369 SDValue VecOp = N->getOperand(0);
22370 EVT VecVT = VecOp.getValueType();
22371 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22372
22373 // We must start with a constant extraction index.
22374 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22375 if (!IndexC)
22376 return false;
22377
22378 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22379 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22380
22381 // TODO: deal with the case of implicit anyext of the extraction.
22382 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22383 EVT ScalarVT = N->getValueType(0);
22384 if (VecVT.getScalarType() != ScalarVT)
22385 return false;
22386
22387 // TODO: deal with the cases other than everything being integer-typed.
22388 if (!ScalarVT.isScalarInteger())
22389 return false;
22390
22391 struct Entry {
22393
22394 // Which bits of VecOp does it contain?
22395 unsigned BitPos;
22396 int NumBits;
22397 // NOTE: the actual width of \p Producer may be wider than NumBits!
22398
22399 Entry(Entry &&) = default;
22400 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22401 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22402
22403 Entry() = delete;
22404 Entry(const Entry &) = delete;
22405 Entry &operator=(const Entry &) = delete;
22406 Entry &operator=(Entry &&) = delete;
22407 };
22408 SmallVector<Entry, 32> Worklist;
22410
22411 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22412 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22413 /*NumBits=*/VecEltBitWidth);
22414
22415 while (!Worklist.empty()) {
22416 Entry E = Worklist.pop_back_val();
22417 // Does the node not even use any of the VecOp bits?
22418 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22419 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22420 return false; // Let's allow the other combines clean this up first.
22421 // Did we fail to model any of the users of the Producer?
22422 bool ProducerIsLeaf = false;
22423 // Look at each user of this Producer.
22424 for (SDNode *User : E.Producer->uses()) {
22425 switch (User->getOpcode()) {
22426 // TODO: support ISD::BITCAST
22427 // TODO: support ISD::ANY_EXTEND
22428 // TODO: support ISD::ZERO_EXTEND
22429 // TODO: support ISD::SIGN_EXTEND
22430 case ISD::TRUNCATE:
22431 // Truncation simply means we keep position, but extract less bits.
22432 Worklist.emplace_back(User, E.BitPos,
22433 /*NumBits=*/User->getValueSizeInBits(0));
22434 break;
22435 // TODO: support ISD::SRA
22436 // TODO: support ISD::SHL
22437 case ISD::SRL:
22438 // We should be shifting the Producer by a constant amount.
22439 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22440 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22441 // Logical right-shift means that we start extraction later,
22442 // but stop it at the same position we did previously.
22443 unsigned ShAmt = ShAmtC->getZExtValue();
22444 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22445 break;
22446 }
22447 [[fallthrough]];
22448 default:
22449 // We can not model this user of the Producer.
22450 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22451 ProducerIsLeaf = true;
22452 // Profitability check: all users that we can not model
22453 // must be ISD::BUILD_VECTOR's.
22454 if (User->getOpcode() != ISD::BUILD_VECTOR)
22455 return false;
22456 break;
22457 }
22458 }
22459 if (ProducerIsLeaf)
22460 Leafs.emplace_back(std::move(E));
22461 }
22462
22463 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22464
22465 // If we are still at the same element granularity, give up,
22466 if (NewVecEltBitWidth == VecEltBitWidth)
22467 return false;
22468
22469 // The vector width must be a multiple of the new element width.
22470 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22471 return false;
22472
22473 // All leafs must agree on the new element width.
22474 // All leafs must not expect any "padding" bits ontop of that width.
22475 // All leafs must start extraction from multiple of that width.
22476 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22477 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22478 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22479 E.BitPos % NewVecEltBitWidth == 0;
22480 }))
22481 return false;
22482
22483 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22484 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22485 VecVT.getSizeInBits() / NewVecEltBitWidth);
22486
22487 if (LegalTypes &&
22488 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22489 return false;
22490
22491 if (LegalOperations &&
22492 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22494 return false;
22495
22496 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22497 for (const Entry &E : Leafs) {
22498 SDLoc DL(E.Producer);
22499 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22500 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22501 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22502 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22503 DAG.getVectorIdxConstant(NewIndex, DL));
22504 CombineTo(E.Producer, V);
22505 }
22506
22507 return true;
22508}
22509
22510SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22511 SDValue VecOp = N->getOperand(0);
22512 SDValue Index = N->getOperand(1);
22513 EVT ScalarVT = N->getValueType(0);
22514 EVT VecVT = VecOp.getValueType();
22515 if (VecOp.isUndef())
22516 return DAG.getUNDEF(ScalarVT);
22517
22518 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22519 //
22520 // This only really matters if the index is non-constant since other combines
22521 // on the constant elements already work.
22522 SDLoc DL(N);
22523 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22524 Index == VecOp.getOperand(2)) {
22525 SDValue Elt = VecOp.getOperand(1);
22526 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22527 }
22528
22529 // (vextract (scalar_to_vector val, 0) -> val
22530 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22531 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22532 if (DAG.isKnownNeverZero(Index))
22533 return DAG.getUNDEF(ScalarVT);
22534
22535 // Check if the result type doesn't match the inserted element type.
22536 // The inserted element and extracted element may have mismatched bitwidth.
22537 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22538 SDValue InOp = VecOp.getOperand(0);
22539 if (InOp.getValueType() != ScalarVT) {
22540 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22541 if (InOp.getValueType().bitsGT(ScalarVT))
22542 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22543 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22544 }
22545 return InOp;
22546 }
22547
22548 // extract_vector_elt of out-of-bounds element -> UNDEF
22549 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22550 if (IndexC && VecVT.isFixedLengthVector() &&
22551 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22552 return DAG.getUNDEF(ScalarVT);
22553
22554 // extract_vector_elt (build_vector x, y), 1 -> y
22555 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22556 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22557 TLI.isTypeLegal(VecVT)) {
22558 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22559 VecVT.isFixedLengthVector()) &&
22560 "BUILD_VECTOR used for scalable vectors");
22561 unsigned IndexVal =
22562 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22563 SDValue Elt = VecOp.getOperand(IndexVal);
22564 EVT InEltVT = Elt.getValueType();
22565
22566 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22567 isNullConstant(Elt)) {
22568 // Sometimes build_vector's scalar input types do not match result type.
22569 if (ScalarVT == InEltVT)
22570 return Elt;
22571
22572 // TODO: It may be useful to truncate if free if the build_vector
22573 // implicitly converts.
22574 }
22575 }
22576
22577 if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations))
22578 return BO;
22579
22580 if (VecVT.isScalableVector())
22581 return SDValue();
22582
22583 // All the code from this point onwards assumes fixed width vectors, but it's
22584 // possible that some of the combinations could be made to work for scalable
22585 // vectors too.
22586 unsigned NumElts = VecVT.getVectorNumElements();
22587 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22588
22589 // See if the extracted element is constant, in which case fold it if its
22590 // a legal fp immediate.
22591 if (IndexC && ScalarVT.isFloatingPoint()) {
22592 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22593 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22594 if (KnownElt.isConstant()) {
22595 APFloat CstFP =
22596 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22597 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22598 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22599 }
22600 }
22601
22602 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22603 // there are regressions on multiple targets without it. We can end up with a
22604 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22605 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22606 VecOp.hasOneUse()) {
22607 // The vector index of the LSBs of the source depend on the endian-ness.
22608 bool IsLE = DAG.getDataLayout().isLittleEndian();
22609 unsigned ExtractIndex = IndexC->getZExtValue();
22610 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22611 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22612 SDValue BCSrc = VecOp.getOperand(0);
22613 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22614 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22615
22616 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22617 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22618 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22619 // trunc i64 X to i32
22620 SDValue X = BCSrc.getOperand(0);
22621 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22622 "Extract element and scalar to vector can't change element type "
22623 "from FP to integer.");
22624 unsigned XBitWidth = X.getValueSizeInBits();
22625 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22626
22627 // An extract element return value type can be wider than its vector
22628 // operand element type. In that case, the high bits are undefined, so
22629 // it's possible that we may need to extend rather than truncate.
22630 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22631 assert(XBitWidth % VecEltBitWidth == 0 &&
22632 "Scalar bitwidth must be a multiple of vector element bitwidth");
22633 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22634 }
22635 }
22636 }
22637
22638 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22639 // We only perform this optimization before the op legalization phase because
22640 // we may introduce new vector instructions which are not backed by TD
22641 // patterns. For example on AVX, extracting elements from a wide vector
22642 // without using extract_subvector. However, if we can find an underlying
22643 // scalar value, then we can always use that.
22644 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22645 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22646 // Find the new index to extract from.
22647 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22648
22649 // Extracting an undef index is undef.
22650 if (OrigElt == -1)
22651 return DAG.getUNDEF(ScalarVT);
22652
22653 // Select the right vector half to extract from.
22654 SDValue SVInVec;
22655 if (OrigElt < (int)NumElts) {
22656 SVInVec = VecOp.getOperand(0);
22657 } else {
22658 SVInVec = VecOp.getOperand(1);
22659 OrigElt -= NumElts;
22660 }
22661
22662 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22663 SDValue InOp = SVInVec.getOperand(OrigElt);
22664 if (InOp.getValueType() != ScalarVT) {
22665 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22666 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22667 }
22668
22669 return InOp;
22670 }
22671
22672 // FIXME: We should handle recursing on other vector shuffles and
22673 // scalar_to_vector here as well.
22674
22675 if (!LegalOperations ||
22676 // FIXME: Should really be just isOperationLegalOrCustom.
22679 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22680 DAG.getVectorIdxConstant(OrigElt, DL));
22681 }
22682 }
22683
22684 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22685 // simplify it based on the (valid) extraction indices.
22686 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22687 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22688 Use->getOperand(0) == VecOp &&
22689 isa<ConstantSDNode>(Use->getOperand(1));
22690 })) {
22691 APInt DemandedElts = APInt::getZero(NumElts);
22692 for (SDNode *Use : VecOp->uses()) {
22693 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22694 if (CstElt->getAPIntValue().ult(NumElts))
22695 DemandedElts.setBit(CstElt->getZExtValue());
22696 }
22697 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22698 // We simplified the vector operand of this extract element. If this
22699 // extract is not dead, visit it again so it is folded properly.
22700 if (N->getOpcode() != ISD::DELETED_NODE)
22701 AddToWorklist(N);
22702 return SDValue(N, 0);
22703 }
22704 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22705 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22706 // We simplified the vector operand of this extract element. If this
22707 // extract is not dead, visit it again so it is folded properly.
22708 if (N->getOpcode() != ISD::DELETED_NODE)
22709 AddToWorklist(N);
22710 return SDValue(N, 0);
22711 }
22712 }
22713
22714 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22715 return SDValue(N, 0);
22716
22717 // Everything under here is trying to match an extract of a loaded value.
22718 // If the result of load has to be truncated, then it's not necessarily
22719 // profitable.
22720 bool BCNumEltsChanged = false;
22721 EVT ExtVT = VecVT.getVectorElementType();
22722 EVT LVT = ExtVT;
22723 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22724 return SDValue();
22725
22726 if (VecOp.getOpcode() == ISD::BITCAST) {
22727 // Don't duplicate a load with other uses.
22728 if (!VecOp.hasOneUse())
22729 return SDValue();
22730
22731 EVT BCVT = VecOp.getOperand(0).getValueType();
22732 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22733 return SDValue();
22734 if (NumElts != BCVT.getVectorNumElements())
22735 BCNumEltsChanged = true;
22736 VecOp = VecOp.getOperand(0);
22737 ExtVT = BCVT.getVectorElementType();
22738 }
22739
22740 // extract (vector load $addr), i --> load $addr + i * size
22741 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22742 ISD::isNormalLoad(VecOp.getNode()) &&
22743 !Index->hasPredecessor(VecOp.getNode())) {
22744 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22745 if (VecLoad && VecLoad->isSimple())
22746 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22747 }
22748
22749 // Perform only after legalization to ensure build_vector / vector_shuffle
22750 // optimizations have already been done.
22751 if (!LegalOperations || !IndexC)
22752 return SDValue();
22753
22754 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22755 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22756 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22757 int Elt = IndexC->getZExtValue();
22758 LoadSDNode *LN0 = nullptr;
22759 if (ISD::isNormalLoad(VecOp.getNode())) {
22760 LN0 = cast<LoadSDNode>(VecOp);
22761 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22762 VecOp.getOperand(0).getValueType() == ExtVT &&
22763 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22764 // Don't duplicate a load with other uses.
22765 if (!VecOp.hasOneUse())
22766 return SDValue();
22767
22768 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22769 }
22770 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22771 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22772 // =>
22773 // (load $addr+1*size)
22774
22775 // Don't duplicate a load with other uses.
22776 if (!VecOp.hasOneUse())
22777 return SDValue();
22778
22779 // If the bit convert changed the number of elements, it is unsafe
22780 // to examine the mask.
22781 if (BCNumEltsChanged)
22782 return SDValue();
22783
22784 // Select the input vector, guarding against out of range extract vector.
22785 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22786 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22787
22788 if (VecOp.getOpcode() == ISD::BITCAST) {
22789 // Don't duplicate a load with other uses.
22790 if (!VecOp.hasOneUse())
22791 return SDValue();
22792
22793 VecOp = VecOp.getOperand(0);
22794 }
22795 if (ISD::isNormalLoad(VecOp.getNode())) {
22796 LN0 = cast<LoadSDNode>(VecOp);
22797 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22798 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22799 }
22800 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22801 VecVT.getVectorElementType() == ScalarVT &&
22802 (!LegalTypes ||
22803 TLI.isTypeLegal(
22805 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22806 // -> extract_vector_elt a, 0
22807 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22808 // -> extract_vector_elt a, 1
22809 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22810 // -> extract_vector_elt b, 0
22811 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22812 // -> extract_vector_elt b, 1
22813 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22814 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22815 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22816 Index.getValueType());
22817
22818 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22820 ConcatVT.getVectorElementType(),
22821 ConcatOp, NewIdx);
22822 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22823 }
22824
22825 // Make sure we found a non-volatile load and the extractelement is
22826 // the only use.
22827 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22828 return SDValue();
22829
22830 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22831 if (Elt == -1)
22832 return DAG.getUNDEF(LVT);
22833
22834 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22835}
22836
22837// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22838SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22839 // We perform this optimization post type-legalization because
22840 // the type-legalizer often scalarizes integer-promoted vectors.
22841 // Performing this optimization before may create bit-casts which
22842 // will be type-legalized to complex code sequences.
22843 // We perform this optimization only before the operation legalizer because we
22844 // may introduce illegal operations.
22845 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22846 return SDValue();
22847
22848 unsigned NumInScalars = N->getNumOperands();
22849 SDLoc DL(N);
22850 EVT VT = N->getValueType(0);
22851
22852 // Check to see if this is a BUILD_VECTOR of a bunch of values
22853 // which come from any_extend or zero_extend nodes. If so, we can create
22854 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22855 // optimizations. We do not handle sign-extend because we can't fill the sign
22856 // using shuffles.
22857 EVT SourceType = MVT::Other;
22858 bool AllAnyExt = true;
22859
22860 for (unsigned i = 0; i != NumInScalars; ++i) {
22861 SDValue In = N->getOperand(i);
22862 // Ignore undef inputs.
22863 if (In.isUndef()) continue;
22864
22865 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22866 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22867
22868 // Abort if the element is not an extension.
22869 if (!ZeroExt && !AnyExt) {
22870 SourceType = MVT::Other;
22871 break;
22872 }
22873
22874 // The input is a ZeroExt or AnyExt. Check the original type.
22875 EVT InTy = In.getOperand(0).getValueType();
22876
22877 // Check that all of the widened source types are the same.
22878 if (SourceType == MVT::Other)
22879 // First time.
22880 SourceType = InTy;
22881 else if (InTy != SourceType) {
22882 // Multiple income types. Abort.
22883 SourceType = MVT::Other;
22884 break;
22885 }
22886
22887 // Check if all of the extends are ANY_EXTENDs.
22888 AllAnyExt &= AnyExt;
22889 }
22890
22891 // In order to have valid types, all of the inputs must be extended from the
22892 // same source type and all of the inputs must be any or zero extend.
22893 // Scalar sizes must be a power of two.
22894 EVT OutScalarTy = VT.getScalarType();
22895 bool ValidTypes =
22896 SourceType != MVT::Other &&
22897 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22898 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22899
22900 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22901 // turn into a single shuffle instruction.
22902 if (!ValidTypes)
22903 return SDValue();
22904
22905 // If we already have a splat buildvector, then don't fold it if it means
22906 // introducing zeros.
22907 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22908 return SDValue();
22909
22910 bool isLE = DAG.getDataLayout().isLittleEndian();
22911 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22912 assert(ElemRatio > 1 && "Invalid element size ratio");
22913 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22914 DAG.getConstant(0, DL, SourceType);
22915
22916 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22917 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22918
22919 // Populate the new build_vector
22920 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22921 SDValue Cast = N->getOperand(i);
22922 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22923 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22924 Cast.isUndef()) && "Invalid cast opcode");
22925 SDValue In;
22926 if (Cast.isUndef())
22927 In = DAG.getUNDEF(SourceType);
22928 else
22929 In = Cast->getOperand(0);
22930 unsigned Index = isLE ? (i * ElemRatio) :
22931 (i * ElemRatio + (ElemRatio - 1));
22932
22933 assert(Index < Ops.size() && "Invalid index");
22934 Ops[Index] = In;
22935 }
22936
22937 // The type of the new BUILD_VECTOR node.
22938 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22939 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22940 "Invalid vector size");
22941 // Check if the new vector type is legal.
22942 if (!isTypeLegal(VecVT) ||
22943 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22945 return SDValue();
22946
22947 // Make the new BUILD_VECTOR.
22948 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22949
22950 // The new BUILD_VECTOR node has the potential to be further optimized.
22951 AddToWorklist(BV.getNode());
22952 // Bitcast to the desired type.
22953 return DAG.getBitcast(VT, BV);
22954}
22955
22956// Simplify (build_vec (trunc $1)
22957// (trunc (srl $1 half-width))
22958// (trunc (srl $1 (2 * half-width))))
22959// to (bitcast $1)
22960SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22961 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22962
22963 EVT VT = N->getValueType(0);
22964
22965 // Don't run this before LegalizeTypes if VT is legal.
22966 // Targets may have other preferences.
22967 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22968 return SDValue();
22969
22970 // Only for little endian
22971 if (!DAG.getDataLayout().isLittleEndian())
22972 return SDValue();
22973
22974 SDLoc DL(N);
22975 EVT OutScalarTy = VT.getScalarType();
22976 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22977
22978 // Only for power of two types to be sure that bitcast works well
22979 if (!isPowerOf2_64(ScalarTypeBitsize))
22980 return SDValue();
22981
22982 unsigned NumInScalars = N->getNumOperands();
22983
22984 // Look through bitcasts
22985 auto PeekThroughBitcast = [](SDValue Op) {
22986 if (Op.getOpcode() == ISD::BITCAST)
22987 return Op.getOperand(0);
22988 return Op;
22989 };
22990
22991 // The source value where all the parts are extracted.
22992 SDValue Src;
22993 for (unsigned i = 0; i != NumInScalars; ++i) {
22994 SDValue In = PeekThroughBitcast(N->getOperand(i));
22995 // Ignore undef inputs.
22996 if (In.isUndef()) continue;
22997
22998 if (In.getOpcode() != ISD::TRUNCATE)
22999 return SDValue();
23000
23001 In = PeekThroughBitcast(In.getOperand(0));
23002
23003 if (In.getOpcode() != ISD::SRL) {
23004 // For now only build_vec without shuffling, handle shifts here in the
23005 // future.
23006 if (i != 0)
23007 return SDValue();
23008
23009 Src = In;
23010 } else {
23011 // In is SRL
23012 SDValue part = PeekThroughBitcast(In.getOperand(0));
23013
23014 if (!Src) {
23015 Src = part;
23016 } else if (Src != part) {
23017 // Vector parts do not stem from the same variable
23018 return SDValue();
23019 }
23020
23021 SDValue ShiftAmtVal = In.getOperand(1);
23022 if (!isa<ConstantSDNode>(ShiftAmtVal))
23023 return SDValue();
23024
23025 uint64_t ShiftAmt = In.getConstantOperandVal(1);
23026
23027 // The extracted value is not extracted at the right position
23028 if (ShiftAmt != i * ScalarTypeBitsize)
23029 return SDValue();
23030 }
23031 }
23032
23033 // Only cast if the size is the same
23034 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23035 return SDValue();
23036
23037 return DAG.getBitcast(VT, Src);
23038}
23039
23040SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23041 ArrayRef<int> VectorMask,
23042 SDValue VecIn1, SDValue VecIn2,
23043 unsigned LeftIdx, bool DidSplitVec) {
23044 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23045
23046 EVT VT = N->getValueType(0);
23047 EVT InVT1 = VecIn1.getValueType();
23048 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23049
23050 unsigned NumElems = VT.getVectorNumElements();
23051 unsigned ShuffleNumElems = NumElems;
23052
23053 // If we artificially split a vector in two already, then the offsets in the
23054 // operands will all be based off of VecIn1, even those in VecIn2.
23055 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23056
23057 uint64_t VTSize = VT.getFixedSizeInBits();
23058 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23059 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23060
23061 assert(InVT2Size <= InVT1Size &&
23062 "Inputs must be sorted to be in non-increasing vector size order.");
23063
23064 // We can't generate a shuffle node with mismatched input and output types.
23065 // Try to make the types match the type of the output.
23066 if (InVT1 != VT || InVT2 != VT) {
23067 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23068 // If the output vector length is a multiple of both input lengths,
23069 // we can concatenate them and pad the rest with undefs.
23070 unsigned NumConcats = VTSize / InVT1Size;
23071 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23072 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23073 ConcatOps[0] = VecIn1;
23074 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23075 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23076 VecIn2 = SDValue();
23077 } else if (InVT1Size == VTSize * 2) {
23078 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23079 return SDValue();
23080
23081 if (!VecIn2.getNode()) {
23082 // If we only have one input vector, and it's twice the size of the
23083 // output, split it in two.
23084 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23085 DAG.getVectorIdxConstant(NumElems, DL));
23086 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23087 // Since we now have shorter input vectors, adjust the offset of the
23088 // second vector's start.
23089 Vec2Offset = NumElems;
23090 } else {
23091 assert(InVT2Size <= InVT1Size &&
23092 "Second input is not going to be larger than the first one.");
23093
23094 // VecIn1 is wider than the output, and we have another, possibly
23095 // smaller input. Pad the smaller input with undefs, shuffle at the
23096 // input vector width, and extract the output.
23097 // The shuffle type is different than VT, so check legality again.
23098 if (LegalOperations &&
23100 return SDValue();
23101
23102 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23103 // lower it back into a BUILD_VECTOR. So if the inserted type is
23104 // illegal, don't even try.
23105 if (InVT1 != InVT2) {
23106 if (!TLI.isTypeLegal(InVT2))
23107 return SDValue();
23108 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23109 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23110 }
23111 ShuffleNumElems = NumElems * 2;
23112 }
23113 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23114 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23115 ConcatOps[0] = VecIn2;
23116 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23117 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23118 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23119 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23120 return SDValue();
23121 // If dest vector has less than two elements, then use shuffle and extract
23122 // from larger regs will cost even more.
23123 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23124 return SDValue();
23125 assert(InVT2Size <= InVT1Size &&
23126 "Second input is not going to be larger than the first one.");
23127
23128 // VecIn1 is wider than the output, and we have another, possibly
23129 // smaller input. Pad the smaller input with undefs, shuffle at the
23130 // input vector width, and extract the output.
23131 // The shuffle type is different than VT, so check legality again.
23132 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23133 return SDValue();
23134
23135 if (InVT1 != InVT2) {
23136 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23137 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23138 }
23139 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23140 } else {
23141 // TODO: Support cases where the length mismatch isn't exactly by a
23142 // factor of 2.
23143 // TODO: Move this check upwards, so that if we have bad type
23144 // mismatches, we don't create any DAG nodes.
23145 return SDValue();
23146 }
23147 }
23148
23149 // Initialize mask to undef.
23150 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23151
23152 // Only need to run up to the number of elements actually used, not the
23153 // total number of elements in the shuffle - if we are shuffling a wider
23154 // vector, the high lanes should be set to undef.
23155 for (unsigned i = 0; i != NumElems; ++i) {
23156 if (VectorMask[i] <= 0)
23157 continue;
23158
23159 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23160 if (VectorMask[i] == (int)LeftIdx) {
23161 Mask[i] = ExtIndex;
23162 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23163 Mask[i] = Vec2Offset + ExtIndex;
23164 }
23165 }
23166
23167 // The type the input vectors may have changed above.
23168 InVT1 = VecIn1.getValueType();
23169
23170 // If we already have a VecIn2, it should have the same type as VecIn1.
23171 // If we don't, get an undef/zero vector of the appropriate type.
23172 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23173 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23174
23175 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23176 if (ShuffleNumElems > NumElems)
23177 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23178
23179 return Shuffle;
23180}
23181
23183 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23184
23185 // First, determine where the build vector is not undef.
23186 // TODO: We could extend this to handle zero elements as well as undefs.
23187 int NumBVOps = BV->getNumOperands();
23188 int ZextElt = -1;
23189 for (int i = 0; i != NumBVOps; ++i) {
23190 SDValue Op = BV->getOperand(i);
23191 if (Op.isUndef())
23192 continue;
23193 if (ZextElt == -1)
23194 ZextElt = i;
23195 else
23196 return SDValue();
23197 }
23198 // Bail out if there's no non-undef element.
23199 if (ZextElt == -1)
23200 return SDValue();
23201
23202 // The build vector contains some number of undef elements and exactly
23203 // one other element. That other element must be a zero-extended scalar
23204 // extracted from a vector at a constant index to turn this into a shuffle.
23205 // Also, require that the build vector does not implicitly truncate/extend
23206 // its elements.
23207 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23208 EVT VT = BV->getValueType(0);
23209 SDValue Zext = BV->getOperand(ZextElt);
23210 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23212 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23214 return SDValue();
23215
23216 // The zero-extend must be a multiple of the source size, and we must be
23217 // building a vector of the same size as the source of the extract element.
23218 SDValue Extract = Zext.getOperand(0);
23219 unsigned DestSize = Zext.getValueSizeInBits();
23220 unsigned SrcSize = Extract.getValueSizeInBits();
23221 if (DestSize % SrcSize != 0 ||
23222 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23223 return SDValue();
23224
23225 // Create a shuffle mask that will combine the extracted element with zeros
23226 // and undefs.
23227 int ZextRatio = DestSize / SrcSize;
23228 int NumMaskElts = NumBVOps * ZextRatio;
23229 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23230 for (int i = 0; i != NumMaskElts; ++i) {
23231 if (i / ZextRatio == ZextElt) {
23232 // The low bits of the (potentially translated) extracted element map to
23233 // the source vector. The high bits map to zero. We will use a zero vector
23234 // as the 2nd source operand of the shuffle, so use the 1st element of
23235 // that vector (mask value is number-of-elements) for the high bits.
23236 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23237 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23238 : NumMaskElts;
23239 }
23240
23241 // Undef elements of the build vector remain undef because we initialize
23242 // the shuffle mask with -1.
23243 }
23244
23245 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23246 // bitcast (shuffle V, ZeroVec, VectorMask)
23247 SDLoc DL(BV);
23248 EVT VecVT = Extract.getOperand(0).getValueType();
23249 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23250 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23251 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23252 ZeroVec, ShufMask, DAG);
23253 if (!Shuf)
23254 return SDValue();
23255 return DAG.getBitcast(VT, Shuf);
23256}
23257
23258// FIXME: promote to STLExtras.
23259template <typename R, typename T>
23260static auto getFirstIndexOf(R &&Range, const T &Val) {
23261 auto I = find(Range, Val);
23262 if (I == Range.end())
23263 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23264 return std::distance(Range.begin(), I);
23265}
23266
23267// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23268// operations. If the types of the vectors we're extracting from allow it,
23269// turn this into a vector_shuffle node.
23270SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23271 SDLoc DL(N);
23272 EVT VT = N->getValueType(0);
23273
23274 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23275 if (!isTypeLegal(VT))
23276 return SDValue();
23277
23279 return V;
23280
23281 // May only combine to shuffle after legalize if shuffle is legal.
23282 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23283 return SDValue();
23284
23285 bool UsesZeroVector = false;
23286 unsigned NumElems = N->getNumOperands();
23287
23288 // Record, for each element of the newly built vector, which input vector
23289 // that element comes from. -1 stands for undef, 0 for the zero vector,
23290 // and positive values for the input vectors.
23291 // VectorMask maps each element to its vector number, and VecIn maps vector
23292 // numbers to their initial SDValues.
23293
23294 SmallVector<int, 8> VectorMask(NumElems, -1);
23296 VecIn.push_back(SDValue());
23297
23298 for (unsigned i = 0; i != NumElems; ++i) {
23299 SDValue Op = N->getOperand(i);
23300
23301 if (Op.isUndef())
23302 continue;
23303
23304 // See if we can use a blend with a zero vector.
23305 // TODO: Should we generalize this to a blend with an arbitrary constant
23306 // vector?
23308 UsesZeroVector = true;
23309 VectorMask[i] = 0;
23310 continue;
23311 }
23312
23313 // Not an undef or zero. If the input is something other than an
23314 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23315 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23316 !isa<ConstantSDNode>(Op.getOperand(1)))
23317 return SDValue();
23318 SDValue ExtractedFromVec = Op.getOperand(0);
23319
23320 if (ExtractedFromVec.getValueType().isScalableVector())
23321 return SDValue();
23322
23323 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23324 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23325 return SDValue();
23326
23327 // All inputs must have the same element type as the output.
23328 if (VT.getVectorElementType() !=
23329 ExtractedFromVec.getValueType().getVectorElementType())
23330 return SDValue();
23331
23332 // Have we seen this input vector before?
23333 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23334 // a map back from SDValues to numbers isn't worth it.
23335 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23336 if (Idx == -1) { // A new source vector?
23337 Idx = VecIn.size();
23338 VecIn.push_back(ExtractedFromVec);
23339 }
23340
23341 VectorMask[i] = Idx;
23342 }
23343
23344 // If we didn't find at least one input vector, bail out.
23345 if (VecIn.size() < 2)
23346 return SDValue();
23347
23348 // If all the Operands of BUILD_VECTOR extract from same
23349 // vector, then split the vector efficiently based on the maximum
23350 // vector access index and adjust the VectorMask and
23351 // VecIn accordingly.
23352 bool DidSplitVec = false;
23353 if (VecIn.size() == 2) {
23354 unsigned MaxIndex = 0;
23355 unsigned NearestPow2 = 0;
23356 SDValue Vec = VecIn.back();
23357 EVT InVT = Vec.getValueType();
23358 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23359
23360 for (unsigned i = 0; i < NumElems; i++) {
23361 if (VectorMask[i] <= 0)
23362 continue;
23363 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23364 IndexVec[i] = Index;
23365 MaxIndex = std::max(MaxIndex, Index);
23366 }
23367
23368 NearestPow2 = PowerOf2Ceil(MaxIndex);
23369 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23370 NumElems * 2 < NearestPow2) {
23371 unsigned SplitSize = NearestPow2 / 2;
23372 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23373 InVT.getVectorElementType(), SplitSize);
23374 if (TLI.isTypeLegal(SplitVT) &&
23375 SplitSize + SplitVT.getVectorNumElements() <=
23376 InVT.getVectorNumElements()) {
23377 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23378 DAG.getVectorIdxConstant(SplitSize, DL));
23379 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23380 DAG.getVectorIdxConstant(0, DL));
23381 VecIn.pop_back();
23382 VecIn.push_back(VecIn1);
23383 VecIn.push_back(VecIn2);
23384 DidSplitVec = true;
23385
23386 for (unsigned i = 0; i < NumElems; i++) {
23387 if (VectorMask[i] <= 0)
23388 continue;
23389 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23390 }
23391 }
23392 }
23393 }
23394
23395 // Sort input vectors by decreasing vector element count,
23396 // while preserving the relative order of equally-sized vectors.
23397 // Note that we keep the first "implicit zero vector as-is.
23398 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23399 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23400 [](const SDValue &a, const SDValue &b) {
23401 return a.getValueType().getVectorNumElements() >
23402 b.getValueType().getVectorNumElements();
23403 });
23404
23405 // We now also need to rebuild the VectorMask, because it referenced element
23406 // order in VecIn, and we just sorted them.
23407 for (int &SourceVectorIndex : VectorMask) {
23408 if (SourceVectorIndex <= 0)
23409 continue;
23410 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23411 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23412 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23413 SourceVectorIndex = Idx;
23414 }
23415
23416 VecIn = std::move(SortedVecIn);
23417
23418 // TODO: Should this fire if some of the input vectors has illegal type (like
23419 // it does now), or should we let legalization run its course first?
23420
23421 // Shuffle phase:
23422 // Take pairs of vectors, and shuffle them so that the result has elements
23423 // from these vectors in the correct places.
23424 // For example, given:
23425 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23426 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23427 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23428 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23429 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23430 // We will generate:
23431 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23432 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23433 SmallVector<SDValue, 4> Shuffles;
23434 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23435 unsigned LeftIdx = 2 * In + 1;
23436 SDValue VecLeft = VecIn[LeftIdx];
23437 SDValue VecRight =
23438 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23439
23440 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23441 VecRight, LeftIdx, DidSplitVec))
23442 Shuffles.push_back(Shuffle);
23443 else
23444 return SDValue();
23445 }
23446
23447 // If we need the zero vector as an "ingredient" in the blend tree, add it
23448 // to the list of shuffles.
23449 if (UsesZeroVector)
23450 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23451 : DAG.getConstantFP(0.0, DL, VT));
23452
23453 // If we only have one shuffle, we're done.
23454 if (Shuffles.size() == 1)
23455 return Shuffles[0];
23456
23457 // Update the vector mask to point to the post-shuffle vectors.
23458 for (int &Vec : VectorMask)
23459 if (Vec == 0)
23460 Vec = Shuffles.size() - 1;
23461 else
23462 Vec = (Vec - 1) / 2;
23463
23464 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23465 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23466 // generate:
23467 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23468 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23469 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23470 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23471 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23472 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23473 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23474
23475 // Make sure the initial size of the shuffle list is even.
23476 if (Shuffles.size() % 2)
23477 Shuffles.push_back(DAG.getUNDEF(VT));
23478
23479 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23480 if (CurSize % 2) {
23481 Shuffles[CurSize] = DAG.getUNDEF(VT);
23482 CurSize++;
23483 }
23484 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23485 int Left = 2 * In;
23486 int Right = 2 * In + 1;
23487 SmallVector<int, 8> Mask(NumElems, -1);
23488 SDValue L = Shuffles[Left];
23489 ArrayRef<int> LMask;
23490 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23491 L.use_empty() && L.getOperand(1).isUndef() &&
23492 L.getOperand(0).getValueType() == L.getValueType();
23493 if (IsLeftShuffle) {
23494 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23495 L = L.getOperand(0);
23496 }
23497 SDValue R = Shuffles[Right];
23498 ArrayRef<int> RMask;
23499 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23500 R.use_empty() && R.getOperand(1).isUndef() &&
23501 R.getOperand(0).getValueType() == R.getValueType();
23502 if (IsRightShuffle) {
23503 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23504 R = R.getOperand(0);
23505 }
23506 for (unsigned I = 0; I != NumElems; ++I) {
23507 if (VectorMask[I] == Left) {
23508 Mask[I] = I;
23509 if (IsLeftShuffle)
23510 Mask[I] = LMask[I];
23511 VectorMask[I] = In;
23512 } else if (VectorMask[I] == Right) {
23513 Mask[I] = I + NumElems;
23514 if (IsRightShuffle)
23515 Mask[I] = RMask[I] + NumElems;
23516 VectorMask[I] = In;
23517 }
23518 }
23519
23520 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23521 }
23522 }
23523 return Shuffles[0];
23524}
23525
23526// Try to turn a build vector of zero extends of extract vector elts into a
23527// a vector zero extend and possibly an extract subvector.
23528// TODO: Support sign extend?
23529// TODO: Allow undef elements?
23530SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23531 if (LegalOperations)
23532 return SDValue();
23533
23534 EVT VT = N->getValueType(0);
23535
23536 bool FoundZeroExtend = false;
23537 SDValue Op0 = N->getOperand(0);
23538 auto checkElem = [&](SDValue Op) -> int64_t {
23539 unsigned Opc = Op.getOpcode();
23540 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23541 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23542 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23543 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23544 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23545 return C->getZExtValue();
23546 return -1;
23547 };
23548
23549 // Make sure the first element matches
23550 // (zext (extract_vector_elt X, C))
23551 // Offset must be a constant multiple of the
23552 // known-minimum vector length of the result type.
23553 int64_t Offset = checkElem(Op0);
23554 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23555 return SDValue();
23556
23557 unsigned NumElems = N->getNumOperands();
23558 SDValue In = Op0.getOperand(0).getOperand(0);
23559 EVT InSVT = In.getValueType().getScalarType();
23560 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23561
23562 // Don't create an illegal input type after type legalization.
23563 if (LegalTypes && !TLI.isTypeLegal(InVT))
23564 return SDValue();
23565
23566 // Ensure all the elements come from the same vector and are adjacent.
23567 for (unsigned i = 1; i != NumElems; ++i) {
23568 if ((Offset + i) != checkElem(N->getOperand(i)))
23569 return SDValue();
23570 }
23571
23572 SDLoc DL(N);
23573 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23574 Op0.getOperand(0).getOperand(1));
23575 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23576 VT, In);
23577}
23578
23579// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23580// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23581// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23582// This patten can appear during legalization.
23583//
23584// NOTE: This can be generalized to allow more than a single
23585// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23586SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23587 // Don't run this after legalization. Targets may have other preferences.
23588 if (Level >= AfterLegalizeDAG)
23589 return SDValue();
23590
23591 // FIXME: support big-endian.
23592 if (DAG.getDataLayout().isBigEndian())
23593 return SDValue();
23594
23595 EVT VT = N->getValueType(0);
23596 EVT OpVT = N->getOperand(0).getValueType();
23597 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23598
23599 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23600
23601 if (!TLI.isTypeLegal(OpIntVT) ||
23602 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23603 return SDValue();
23604
23605 unsigned EltBitwidth = VT.getScalarSizeInBits();
23606 // NOTE: the actual width of operands may be wider than that!
23607
23608 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23609 // active bits they all have? We'll want to truncate them all to that width.
23610 unsigned ActiveBits = 0;
23611 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23612 for (auto I : enumerate(N->ops())) {
23613 SDValue Op = I.value();
23614 // FIXME: support UNDEF elements?
23615 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23616 unsigned OpActiveBits =
23617 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23618 if (OpActiveBits == 0) {
23619 KnownZeroOps.setBit(I.index());
23620 continue;
23621 }
23622 // Profitability check: don't allow non-zero constant operands.
23623 return SDValue();
23624 }
23625 // Profitability check: there must only be a single non-zero operand,
23626 // and it must be the first operand of the BUILD_VECTOR.
23627 if (I.index() != 0)
23628 return SDValue();
23629 // The operand must be a zero-extension itself.
23630 // FIXME: this could be generalized to known leading zeros check.
23631 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23632 return SDValue();
23633 unsigned CurrActiveBits =
23634 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23635 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23636 ActiveBits = CurrActiveBits;
23637 // We want to at least halve the element size.
23638 if (2 * ActiveBits > EltBitwidth)
23639 return SDValue();
23640 }
23641
23642 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23643 if (ActiveBits == 0)
23644 return SDValue();
23645
23646 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23647 // into how many chunks can we split our element width?
23648 EVT NewScalarIntVT, NewIntVT;
23649 std::optional<unsigned> Factor;
23650 // We can split the element into at least two chunks, but not into more
23651 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23652 // for which the element width is a multiple of it,
23653 // and the resulting types/operations on that chunk width are legal.
23654 assert(2 * ActiveBits <= EltBitwidth &&
23655 "We know that half or less bits of the element are active.");
23656 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23657 if (EltBitwidth % Scale != 0)
23658 continue;
23659 unsigned ChunkBitwidth = EltBitwidth / Scale;
23660 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23661 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23662 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23663 Scale * N->getNumOperands());
23664 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23665 (LegalOperations &&
23666 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23668 continue;
23669 Factor = Scale;
23670 break;
23671 }
23672 if (!Factor)
23673 return SDValue();
23674
23675 SDLoc DL(N);
23676 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23677
23678 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23680 NewOps.reserve(NewIntVT.getVectorNumElements());
23681 for (auto I : enumerate(N->ops())) {
23682 SDValue Op = I.value();
23683 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23684 unsigned SrcOpIdx = I.index();
23685 if (KnownZeroOps[SrcOpIdx]) {
23686 NewOps.append(*Factor, ZeroOp);
23687 continue;
23688 }
23689 Op = DAG.getBitcast(OpIntVT, Op);
23690 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23691 NewOps.emplace_back(Op);
23692 NewOps.append(*Factor - 1, ZeroOp);
23693 }
23694 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23695 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23696 NewBV = DAG.getBitcast(VT, NewBV);
23697 return NewBV;
23698}
23699
23700SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23701 EVT VT = N->getValueType(0);
23702
23703 // A vector built entirely of undefs is undef.
23705 return DAG.getUNDEF(VT);
23706
23707 // If this is a splat of a bitcast from another vector, change to a
23708 // concat_vector.
23709 // For example:
23710 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23711 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23712 //
23713 // If X is a build_vector itself, the concat can become a larger build_vector.
23714 // TODO: Maybe this is useful for non-splat too?
23715 if (!LegalOperations) {
23716 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23717 // Only change build_vector to a concat_vector if the splat value type is
23718 // same as the vector element type.
23719 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23721 EVT SrcVT = Splat.getValueType();
23722 if (SrcVT.isVector()) {
23723 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23724 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23725 SrcVT.getVectorElementType(), NumElts);
23726 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23727 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23728 SDValue Concat =
23729 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23730 return DAG.getBitcast(VT, Concat);
23731 }
23732 }
23733 }
23734 }
23735
23736 // Check if we can express BUILD VECTOR via subvector extract.
23737 if (!LegalTypes && (N->getNumOperands() > 1)) {
23738 SDValue Op0 = N->getOperand(0);
23739 auto checkElem = [&](SDValue Op) -> uint64_t {
23740 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23741 (Op0.getOperand(0) == Op.getOperand(0)))
23742 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23743 return CNode->getZExtValue();
23744 return -1;
23745 };
23746
23747 int Offset = checkElem(Op0);
23748 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23749 if (Offset + i != checkElem(N->getOperand(i))) {
23750 Offset = -1;
23751 break;
23752 }
23753 }
23754
23755 if ((Offset == 0) &&
23756 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23757 return Op0.getOperand(0);
23758 if ((Offset != -1) &&
23759 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23760 0)) // IDX must be multiple of output size.
23761 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23762 Op0.getOperand(0), Op0.getOperand(1));
23763 }
23764
23765 if (SDValue V = convertBuildVecZextToZext(N))
23766 return V;
23767
23768 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23769 return V;
23770
23771 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23772 return V;
23773
23774 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23775 return V;
23776
23777 if (SDValue V = reduceBuildVecToShuffle(N))
23778 return V;
23779
23780 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23781 // Do this late as some of the above may replace the splat.
23783 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23784 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23785 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23786 }
23787
23788 return SDValue();
23789}
23790
23792 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23793 EVT OpVT = N->getOperand(0).getValueType();
23794
23795 // If the operands are legal vectors, leave them alone.
23796 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23797 return SDValue();
23798
23799 SDLoc DL(N);
23800 EVT VT = N->getValueType(0);
23802 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23803
23804 // Keep track of what we encounter.
23805 bool AnyInteger = false;
23806 bool AnyFP = false;
23807 for (const SDValue &Op : N->ops()) {
23808 if (ISD::BITCAST == Op.getOpcode() &&
23809 !Op.getOperand(0).getValueType().isVector())
23810 Ops.push_back(Op.getOperand(0));
23811 else if (ISD::UNDEF == Op.getOpcode())
23812 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23813 else
23814 return SDValue();
23815
23816 // Note whether we encounter an integer or floating point scalar.
23817 // If it's neither, bail out, it could be something weird like x86mmx.
23818 EVT LastOpVT = Ops.back().getValueType();
23819 if (LastOpVT.isFloatingPoint())
23820 AnyFP = true;
23821 else if (LastOpVT.isInteger())
23822 AnyInteger = true;
23823 else
23824 return SDValue();
23825 }
23826
23827 // If any of the operands is a floating point scalar bitcast to a vector,
23828 // use floating point types throughout, and bitcast everything.
23829 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23830 if (AnyFP) {
23832 if (AnyInteger) {
23833 for (SDValue &Op : Ops) {
23834 if (Op.getValueType() == SVT)
23835 continue;
23836 if (Op.isUndef())
23837 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23838 else
23839 Op = DAG.getBitcast(SVT, Op);
23840 }
23841 }
23842 }
23843
23844 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23845 VT.getSizeInBits() / SVT.getSizeInBits());
23846 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23847}
23848
23849// Attempt to merge nested concat_vectors/undefs.
23850// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23851// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23853 SelectionDAG &DAG) {
23854 EVT VT = N->getValueType(0);
23855
23856 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23857 EVT SubVT;
23858 SDValue FirstConcat;
23859 for (const SDValue &Op : N->ops()) {
23860 if (Op.isUndef())
23861 continue;
23862 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23863 return SDValue();
23864 if (!FirstConcat) {
23865 SubVT = Op.getOperand(0).getValueType();
23866 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23867 return SDValue();
23868 FirstConcat = Op;
23869 continue;
23870 }
23871 if (SubVT != Op.getOperand(0).getValueType())
23872 return SDValue();
23873 }
23874 assert(FirstConcat && "Concat of all-undefs found");
23875
23876 SmallVector<SDValue> ConcatOps;
23877 for (const SDValue &Op : N->ops()) {
23878 if (Op.isUndef()) {
23879 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23880 continue;
23881 }
23882 ConcatOps.append(Op->op_begin(), Op->op_end());
23883 }
23884 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23885}
23886
23887// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23888// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23889// most two distinct vectors the same size as the result, attempt to turn this
23890// into a legal shuffle.
23892 EVT VT = N->getValueType(0);
23893 EVT OpVT = N->getOperand(0).getValueType();
23894
23895 // We currently can't generate an appropriate shuffle for a scalable vector.
23896 if (VT.isScalableVector())
23897 return SDValue();
23898
23899 int NumElts = VT.getVectorNumElements();
23900 int NumOpElts = OpVT.getVectorNumElements();
23901
23902 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23904
23905 for (SDValue Op : N->ops()) {
23907
23908 // UNDEF nodes convert to UNDEF shuffle mask values.
23909 if (Op.isUndef()) {
23910 Mask.append((unsigned)NumOpElts, -1);
23911 continue;
23912 }
23913
23914 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23915 return SDValue();
23916
23917 // What vector are we extracting the subvector from and at what index?
23918 SDValue ExtVec = Op.getOperand(0);
23919 int ExtIdx = Op.getConstantOperandVal(1);
23920
23921 // We want the EVT of the original extraction to correctly scale the
23922 // extraction index.
23923 EVT ExtVT = ExtVec.getValueType();
23924 ExtVec = peekThroughBitcasts(ExtVec);
23925
23926 // UNDEF nodes convert to UNDEF shuffle mask values.
23927 if (ExtVec.isUndef()) {
23928 Mask.append((unsigned)NumOpElts, -1);
23929 continue;
23930 }
23931
23932 // Ensure that we are extracting a subvector from a vector the same
23933 // size as the result.
23934 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23935 return SDValue();
23936
23937 // Scale the subvector index to account for any bitcast.
23938 int NumExtElts = ExtVT.getVectorNumElements();
23939 if (0 == (NumExtElts % NumElts))
23940 ExtIdx /= (NumExtElts / NumElts);
23941 else if (0 == (NumElts % NumExtElts))
23942 ExtIdx *= (NumElts / NumExtElts);
23943 else
23944 return SDValue();
23945
23946 // At most we can reference 2 inputs in the final shuffle.
23947 if (SV0.isUndef() || SV0 == ExtVec) {
23948 SV0 = ExtVec;
23949 for (int i = 0; i != NumOpElts; ++i)
23950 Mask.push_back(i + ExtIdx);
23951 } else if (SV1.isUndef() || SV1 == ExtVec) {
23952 SV1 = ExtVec;
23953 for (int i = 0; i != NumOpElts; ++i)
23954 Mask.push_back(i + ExtIdx + NumElts);
23955 } else {
23956 return SDValue();
23957 }
23958 }
23959
23960 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23961 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23962 DAG.getBitcast(VT, SV1), Mask, DAG);
23963}
23964
23966 unsigned CastOpcode = N->getOperand(0).getOpcode();
23967 switch (CastOpcode) {
23968 case ISD::SINT_TO_FP:
23969 case ISD::UINT_TO_FP:
23970 case ISD::FP_TO_SINT:
23971 case ISD::FP_TO_UINT:
23972 // TODO: Allow more opcodes?
23973 // case ISD::BITCAST:
23974 // case ISD::TRUNCATE:
23975 // case ISD::ZERO_EXTEND:
23976 // case ISD::SIGN_EXTEND:
23977 // case ISD::FP_EXTEND:
23978 break;
23979 default:
23980 return SDValue();
23981 }
23982
23983 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23984 if (!SrcVT.isVector())
23985 return SDValue();
23986
23987 // All operands of the concat must be the same kind of cast from the same
23988 // source type.
23990 for (SDValue Op : N->ops()) {
23991 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23992 Op.getOperand(0).getValueType() != SrcVT)
23993 return SDValue();
23994 SrcOps.push_back(Op.getOperand(0));
23995 }
23996
23997 // The wider cast must be supported by the target. This is unusual because
23998 // the operation support type parameter depends on the opcode. In addition,
23999 // check the other type in the cast to make sure this is really legal.
24000 EVT VT = N->getValueType(0);
24001 EVT SrcEltVT = SrcVT.getVectorElementType();
24002 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24003 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
24004 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24005 switch (CastOpcode) {
24006 case ISD::SINT_TO_FP:
24007 case ISD::UINT_TO_FP:
24008 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
24009 !TLI.isTypeLegal(VT))
24010 return SDValue();
24011 break;
24012 case ISD::FP_TO_SINT:
24013 case ISD::FP_TO_UINT:
24014 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24015 !TLI.isTypeLegal(ConcatSrcVT))
24016 return SDValue();
24017 break;
24018 default:
24019 llvm_unreachable("Unexpected cast opcode");
24020 }
24021
24022 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24023 SDLoc DL(N);
24024 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24025 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24026}
24027
24028// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24029// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24030// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24032 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24033 bool LegalOperations) {
24034 EVT VT = N->getValueType(0);
24035 EVT OpVT = N->getOperand(0).getValueType();
24036 if (VT.isScalableVector())
24037 return SDValue();
24038
24039 // For now, only allow simple 2-operand concatenations.
24040 if (N->getNumOperands() != 2)
24041 return SDValue();
24042
24043 // Don't create illegal types/shuffles when not allowed to.
24044 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24045 (LegalOperations &&
24047 return SDValue();
24048
24049 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24050 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24051 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24052 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24053 // (4) and for now, the SHUFFLE_VECTOR must be unary.
24054 ShuffleVectorSDNode *SVN = nullptr;
24055 for (SDValue Op : N->ops()) {
24056 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24057 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24058 all_of(N->ops(), [CurSVN](SDValue Op) {
24059 // FIXME: can we allow UNDEF operands?
24060 return !Op.isUndef() &&
24061 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24062 })) {
24063 SVN = CurSVN;
24064 break;
24065 }
24066 }
24067 if (!SVN)
24068 return SDValue();
24069
24070 // We are going to pad the shuffle operands, so any indice, that was picking
24071 // from the second operand, must be adjusted.
24072 SmallVector<int, 16> AdjustedMask;
24073 AdjustedMask.reserve(SVN->getMask().size());
24074 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24075 append_range(AdjustedMask, SVN->getMask());
24076
24077 // Identity masks for the operands of the (padded) shuffle.
24078 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24079 MutableArrayRef<int> FirstShufOpIdentityMask =
24080 MutableArrayRef<int>(IdentityMask)
24082 MutableArrayRef<int> SecondShufOpIdentityMask =
24084 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24085 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24087
24088 // New combined shuffle mask.
24090 Mask.reserve(VT.getVectorNumElements());
24091 for (SDValue Op : N->ops()) {
24092 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24093 if (Op.getNode() == SVN) {
24094 append_range(Mask, AdjustedMask);
24095 continue;
24096 }
24097 if (Op == SVN->getOperand(0)) {
24098 append_range(Mask, FirstShufOpIdentityMask);
24099 continue;
24100 }
24101 if (Op == SVN->getOperand(1)) {
24102 append_range(Mask, SecondShufOpIdentityMask);
24103 continue;
24104 }
24105 llvm_unreachable("Unexpected operand!");
24106 }
24107
24108 // Don't create illegal shuffle masks.
24109 if (!TLI.isShuffleMaskLegal(Mask, VT))
24110 return SDValue();
24111
24112 // Pad the shuffle operands with UNDEF.
24113 SDLoc dl(N);
24114 std::array<SDValue, 2> ShufOps;
24115 for (auto I : zip(SVN->ops(), ShufOps)) {
24116 SDValue ShufOp = std::get<0>(I);
24117 SDValue &NewShufOp = std::get<1>(I);
24118 if (ShufOp.isUndef())
24119 NewShufOp = DAG.getUNDEF(VT);
24120 else {
24121 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24122 DAG.getUNDEF(OpVT));
24123 ShufOpParts[0] = ShufOp;
24124 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24125 }
24126 }
24127 // Finally, create the new wide shuffle.
24128 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24129}
24130
24131SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24132 // If we only have one input vector, we don't need to do any concatenation.
24133 if (N->getNumOperands() == 1)
24134 return N->getOperand(0);
24135
24136 // Check if all of the operands are undefs.
24137 EVT VT = N->getValueType(0);
24139 return DAG.getUNDEF(VT);
24140
24141 // Optimize concat_vectors where all but the first of the vectors are undef.
24142 if (all_of(drop_begin(N->ops()),
24143 [](const SDValue &Op) { return Op.isUndef(); })) {
24144 SDValue In = N->getOperand(0);
24145 assert(In.getValueType().isVector() && "Must concat vectors");
24146
24147 // If the input is a concat_vectors, just make a larger concat by padding
24148 // with smaller undefs.
24149 //
24150 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24151 // here could cause an infinite loop. That legalizing happens when LegalDAG
24152 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24153 // scalable.
24154 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24155 !(LegalDAG && In.getValueType().isScalableVector())) {
24156 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24157 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
24158 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24159 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24160 }
24161
24163
24164 // concat_vectors(scalar_to_vector(scalar), undef) ->
24165 // scalar_to_vector(scalar)
24166 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24167 Scalar.hasOneUse()) {
24168 EVT SVT = Scalar.getValueType().getVectorElementType();
24169 if (SVT == Scalar.getOperand(0).getValueType())
24170 Scalar = Scalar.getOperand(0);
24171 }
24172
24173 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24174 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24175 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24176 // look through the trunc so we can still do the transform:
24177 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24178 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24179 !TLI.isTypeLegal(Scalar.getValueType()) &&
24180 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24181 Scalar = Scalar->getOperand(0);
24182
24183 EVT SclTy = Scalar.getValueType();
24184
24185 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24186 return SDValue();
24187
24188 // Bail out if the vector size is not a multiple of the scalar size.
24189 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24190 return SDValue();
24191
24192 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24193 if (VNTNumElms < 2)
24194 return SDValue();
24195
24196 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24197 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24198 return SDValue();
24199
24200 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24201 return DAG.getBitcast(VT, Res);
24202 }
24203 }
24204
24205 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24206 // We have already tested above for an UNDEF only concatenation.
24207 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24208 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24209 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24210 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24211 };
24212 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24214 EVT SVT = VT.getScalarType();
24215
24216 EVT MinVT = SVT;
24217 if (!SVT.isFloatingPoint()) {
24218 // If BUILD_VECTOR are from built from integer, they may have different
24219 // operand types. Get the smallest type and truncate all operands to it.
24220 bool FoundMinVT = false;
24221 for (const SDValue &Op : N->ops())
24222 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24223 EVT OpSVT = Op.getOperand(0).getValueType();
24224 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24225 FoundMinVT = true;
24226 }
24227 assert(FoundMinVT && "Concat vector type mismatch");
24228 }
24229
24230 for (const SDValue &Op : N->ops()) {
24231 EVT OpVT = Op.getValueType();
24232 unsigned NumElts = OpVT.getVectorNumElements();
24233
24234 if (ISD::UNDEF == Op.getOpcode())
24235 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24236
24237 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24238 if (SVT.isFloatingPoint()) {
24239 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24240 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24241 } else {
24242 for (unsigned i = 0; i != NumElts; ++i)
24243 Opnds.push_back(
24244 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24245 }
24246 }
24247 }
24248
24249 assert(VT.getVectorNumElements() == Opnds.size() &&
24250 "Concat vector type mismatch");
24251 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24252 }
24253
24254 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24255 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24257 return V;
24258
24259 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24260 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24262 return V;
24263
24264 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24266 return V;
24267 }
24268
24269 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24270 return V;
24271
24273 N, DAG, TLI, LegalTypes, LegalOperations))
24274 return V;
24275
24276 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24277 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24278 // operands and look for a CONCAT operations that place the incoming vectors
24279 // at the exact same location.
24280 //
24281 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24282 SDValue SingleSource = SDValue();
24283 unsigned PartNumElem =
24284 N->getOperand(0).getValueType().getVectorMinNumElements();
24285
24286 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24287 SDValue Op = N->getOperand(i);
24288
24289 if (Op.isUndef())
24290 continue;
24291
24292 // Check if this is the identity extract:
24293 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24294 return SDValue();
24295
24296 // Find the single incoming vector for the extract_subvector.
24297 if (SingleSource.getNode()) {
24298 if (Op.getOperand(0) != SingleSource)
24299 return SDValue();
24300 } else {
24301 SingleSource = Op.getOperand(0);
24302
24303 // Check the source type is the same as the type of the result.
24304 // If not, this concat may extend the vector, so we can not
24305 // optimize it away.
24306 if (SingleSource.getValueType() != N->getValueType(0))
24307 return SDValue();
24308 }
24309
24310 // Check that we are reading from the identity index.
24311 unsigned IdentityIndex = i * PartNumElem;
24312 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24313 return SDValue();
24314 }
24315
24316 if (SingleSource.getNode())
24317 return SingleSource;
24318
24319 return SDValue();
24320}
24321
24322// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24323// if the subvector can be sourced for free.
24325 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24326 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24327 return V.getOperand(1);
24328 }
24329 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24330 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24331 V.getOperand(0).getValueType() == SubVT &&
24332 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24333 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24334 return V.getOperand(SubIdx);
24335 }
24336 return SDValue();
24337}
24338
24340 SelectionDAG &DAG,
24341 bool LegalOperations) {
24342 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24343 SDValue BinOp = Extract->getOperand(0);
24344 unsigned BinOpcode = BinOp.getOpcode();
24345 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24346 return SDValue();
24347
24348 EVT VecVT = BinOp.getValueType();
24349 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24350 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24351 return SDValue();
24352
24353 SDValue Index = Extract->getOperand(1);
24354 EVT SubVT = Extract->getValueType(0);
24355 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24356 return SDValue();
24357
24358 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24359 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24360
24361 // TODO: We could handle the case where only 1 operand is being inserted by
24362 // creating an extract of the other operand, but that requires checking
24363 // number of uses and/or costs.
24364 if (!Sub0 || !Sub1)
24365 return SDValue();
24366
24367 // We are inserting both operands of the wide binop only to extract back
24368 // to the narrow vector size. Eliminate all of the insert/extract:
24369 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24370 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24371 BinOp->getFlags());
24372}
24373
24374/// If we are extracting a subvector produced by a wide binary operator try
24375/// to use a narrow binary operator and/or avoid concatenation and extraction.
24377 bool LegalOperations) {
24378 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24379 // some of these bailouts with other transforms.
24380
24381 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24382 return V;
24383
24384 // The extract index must be a constant, so we can map it to a concat operand.
24385 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24386 if (!ExtractIndexC)
24387 return SDValue();
24388
24389 // We are looking for an optionally bitcasted wide vector binary operator
24390 // feeding an extract subvector.
24391 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24392 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24393 unsigned BOpcode = BinOp.getOpcode();
24394 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24395 return SDValue();
24396
24397 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24398 // reduced to the unary fneg when it is visited, and we probably want to deal
24399 // with fneg in a target-specific way.
24400 if (BOpcode == ISD::FSUB) {
24401 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24402 if (C && C->getValueAPF().isNegZero())
24403 return SDValue();
24404 }
24405
24406 // The binop must be a vector type, so we can extract some fraction of it.
24407 EVT WideBVT = BinOp.getValueType();
24408 // The optimisations below currently assume we are dealing with fixed length
24409 // vectors. It is possible to add support for scalable vectors, but at the
24410 // moment we've done no analysis to prove whether they are profitable or not.
24411 if (!WideBVT.isFixedLengthVector())
24412 return SDValue();
24413
24414 EVT VT = Extract->getValueType(0);
24415 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24416 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24417 "Extract index is not a multiple of the vector length.");
24418
24419 // Bail out if this is not a proper multiple width extraction.
24420 unsigned WideWidth = WideBVT.getSizeInBits();
24421 unsigned NarrowWidth = VT.getSizeInBits();
24422 if (WideWidth % NarrowWidth != 0)
24423 return SDValue();
24424
24425 // Bail out if we are extracting a fraction of a single operation. This can
24426 // occur because we potentially looked through a bitcast of the binop.
24427 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24428 unsigned WideNumElts = WideBVT.getVectorNumElements();
24429 if (WideNumElts % NarrowingRatio != 0)
24430 return SDValue();
24431
24432 // Bail out if the target does not support a narrower version of the binop.
24433 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24434 WideNumElts / NarrowingRatio);
24435 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24436 LegalOperations))
24437 return SDValue();
24438
24439 // If extraction is cheap, we don't need to look at the binop operands
24440 // for concat ops. The narrow binop alone makes this transform profitable.
24441 // We can't just reuse the original extract index operand because we may have
24442 // bitcasted.
24443 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24444 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24445 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24446 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24447 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24448 SDLoc DL(Extract);
24449 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24450 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24451 BinOp.getOperand(0), NewExtIndex);
24452 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24453 BinOp.getOperand(1), NewExtIndex);
24454 SDValue NarrowBinOp =
24455 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24456 return DAG.getBitcast(VT, NarrowBinOp);
24457 }
24458
24459 // Only handle the case where we are doubling and then halving. A larger ratio
24460 // may require more than two narrow binops to replace the wide binop.
24461 if (NarrowingRatio != 2)
24462 return SDValue();
24463
24464 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24465 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24466 // flavors, but no other 256-bit integer support. This could be extended to
24467 // handle any binop, but that may require fixing/adding other folds to avoid
24468 // codegen regressions.
24469 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24470 return SDValue();
24471
24472 // We need at least one concatenation operation of a binop operand to make
24473 // this transform worthwhile. The concat must double the input vector sizes.
24474 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24475 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24476 return V.getOperand(ConcatOpNum);
24477 return SDValue();
24478 };
24479 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24480 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24481
24482 if (SubVecL || SubVecR) {
24483 // If a binop operand was not the result of a concat, we must extract a
24484 // half-sized operand for our new narrow binop:
24485 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24486 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24487 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24488 SDLoc DL(Extract);
24489 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24490 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24491 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24492 BinOp.getOperand(0), IndexC);
24493
24494 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24495 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24496 BinOp.getOperand(1), IndexC);
24497
24498 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24499 return DAG.getBitcast(VT, NarrowBinOp);
24500 }
24501
24502 return SDValue();
24503}
24504
24505/// If we are extracting a subvector from a wide vector load, convert to a
24506/// narrow load to eliminate the extraction:
24507/// (extract_subvector (load wide vector)) --> (load narrow vector)
24509 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24510 if (DAG.getDataLayout().isBigEndian())
24511 return SDValue();
24512
24513 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24514 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24515 return SDValue();
24516
24517 // Allow targets to opt-out.
24518 EVT VT = Extract->getValueType(0);
24519
24520 // We can only create byte sized loads.
24521 if (!VT.isByteSized())
24522 return SDValue();
24523
24524 unsigned Index = Extract->getConstantOperandVal(1);
24525 unsigned NumElts = VT.getVectorMinNumElements();
24526 // A fixed length vector being extracted from a scalable vector
24527 // may not be any *smaller* than the scalable one.
24528 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24529 return SDValue();
24530
24531 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24532 // multiple of the minimum number of elements in the result type.
24533 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24534 "multiple of the result's element count");
24535
24536 // It's fine to use TypeSize here as we know the offset will not be negative.
24537 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24538
24539 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24540 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24541 return SDValue();
24542
24543 // The narrow load will be offset from the base address of the old load if
24544 // we are extracting from something besides index 0 (little-endian).
24545 SDLoc DL(Extract);
24546
24547 // TODO: Use "BaseIndexOffset" to make this more effective.
24548 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24549
24552 MachineMemOperand *MMO;
24553 if (Offset.isScalable()) {
24554 MachinePointerInfo MPI =
24556 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24557 } else
24558 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24559 StoreSize);
24560
24561 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24562 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24563 return NewLd;
24564}
24565
24566/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24567/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24568/// EXTRACT_SUBVECTOR(Op?, ?),
24569/// Mask'))
24570/// iff it is legal and profitable to do so. Notably, the trimmed mask
24571/// (containing only the elements that are extracted)
24572/// must reference at most two subvectors.
24574 SelectionDAG &DAG,
24575 const TargetLowering &TLI,
24576 bool LegalOperations) {
24577 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24578 "Must only be called on EXTRACT_SUBVECTOR's");
24579
24580 SDValue N0 = N->getOperand(0);
24581
24582 // Only deal with non-scalable vectors.
24583 EVT NarrowVT = N->getValueType(0);
24584 EVT WideVT = N0.getValueType();
24585 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24586 return SDValue();
24587
24588 // The operand must be a shufflevector.
24589 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24590 if (!WideShuffleVector)
24591 return SDValue();
24592
24593 // The old shuffleneeds to go away.
24594 if (!WideShuffleVector->hasOneUse())
24595 return SDValue();
24596
24597 // And the narrow shufflevector that we'll form must be legal.
24598 if (LegalOperations &&
24600 return SDValue();
24601
24602 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24603 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24604 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24605 "Extract index is not a multiple of the output vector length.");
24606
24607 int WideNumElts = WideVT.getVectorNumElements();
24608
24609 SmallVector<int, 16> NewMask;
24610 NewMask.reserve(NumEltsExtracted);
24611 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24612 DemandedSubvectors;
24613
24614 // Try to decode the wide mask into narrow mask from at most two subvectors.
24615 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24616 NumEltsExtracted)) {
24617 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24618 "Out-of-bounds shuffle mask?");
24619
24620 if (M < 0) {
24621 // Does not depend on operands, does not require adjustment.
24622 NewMask.emplace_back(M);
24623 continue;
24624 }
24625
24626 // From which operand of the shuffle does this shuffle mask element pick?
24627 int WideShufOpIdx = M / WideNumElts;
24628 // Which element of that operand is picked?
24629 int OpEltIdx = M % WideNumElts;
24630
24631 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24632 "Shuffle mask vector decomposition failure.");
24633
24634 // And which NumEltsExtracted-sized subvector of that operand is that?
24635 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24636 // And which element within that subvector of that operand is that?
24637 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24638
24639 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24640 "Shuffle mask subvector decomposition failure.");
24641
24642 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24643 WideShufOpIdx * WideNumElts) == M &&
24644 "Shuffle mask full decomposition failure.");
24645
24646 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24647
24648 if (Op.isUndef()) {
24649 // Picking from an undef operand. Let's adjust mask instead.
24650 NewMask.emplace_back(-1);
24651 continue;
24652 }
24653
24654 const std::pair<SDValue, int> DemandedSubvector =
24655 std::make_pair(Op, OpSubvecIdx);
24656
24657 if (DemandedSubvectors.insert(DemandedSubvector)) {
24658 if (DemandedSubvectors.size() > 2)
24659 return SDValue(); // We can't handle more than two subvectors.
24660 // How many elements into the WideVT does this subvector start?
24661 int Index = NumEltsExtracted * OpSubvecIdx;
24662 // Bail out if the extraction isn't going to be cheap.
24663 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24664 return SDValue();
24665 }
24666
24667 // Ok, but from which operand of the new shuffle will this element pick?
24668 int NewOpIdx =
24669 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24670 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24671
24672 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24673 NewMask.emplace_back(AdjM);
24674 }
24675 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24676 assert(DemandedSubvectors.size() <= 2 &&
24677 "Should have ended up demanding at most two subvectors.");
24678
24679 // Did we discover that the shuffle does not actually depend on operands?
24680 if (DemandedSubvectors.empty())
24681 return DAG.getUNDEF(NarrowVT);
24682
24683 // Profitability check: only deal with extractions from the first subvector
24684 // unless the mask becomes an identity mask.
24685 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24686 any_of(NewMask, [](int M) { return M < 0; }))
24687 for (auto &DemandedSubvector : DemandedSubvectors)
24688 if (DemandedSubvector.second != 0)
24689 return SDValue();
24690
24691 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24692 // operand[s]/index[es], so there is no point in checking for it's legality.
24693
24694 // Do not turn a legal shuffle into an illegal one.
24695 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24696 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24697 return SDValue();
24698
24699 SDLoc DL(N);
24700
24702 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24703 &DemandedSubvector : DemandedSubvectors) {
24704 // How many elements into the WideVT does this subvector start?
24705 int Index = NumEltsExtracted * DemandedSubvector.second;
24706 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24707 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24708 DemandedSubvector.first, IndexC));
24709 }
24710 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24711 "Should end up with either one or two ops");
24712
24713 // If we ended up with only one operand, pad with an undef.
24714 if (NewOps.size() == 1)
24715 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24716
24717 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24718}
24719
24720SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24721 EVT NVT = N->getValueType(0);
24722 SDValue V = N->getOperand(0);
24723 uint64_t ExtIdx = N->getConstantOperandVal(1);
24724 SDLoc DL(N);
24725
24726 // Extract from UNDEF is UNDEF.
24727 if (V.isUndef())
24728 return DAG.getUNDEF(NVT);
24729
24731 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24732 return NarrowLoad;
24733
24734 // Combine an extract of an extract into a single extract_subvector.
24735 // ext (ext X, C), 0 --> ext X, C
24736 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24737 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24738 V.getConstantOperandVal(1)) &&
24740 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24741 V.getOperand(1));
24742 }
24743 }
24744
24745 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24746 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24747 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24748 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24749 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24750
24751 // extract_subvector(insert_subvector(x,y,c1),c2)
24752 // --> extract_subvector(y,c2-c1)
24753 // iff we're just extracting from the inserted subvector.
24754 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24755 SDValue InsSub = V.getOperand(1);
24756 EVT InsSubVT = InsSub.getValueType();
24757 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24758 unsigned InsIdx = V.getConstantOperandVal(2);
24759 unsigned NumSubElts = NVT.getVectorMinNumElements();
24760 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24761 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24762 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24763 V.getValueType().isFixedLengthVector())
24764 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24765 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24766 }
24767
24768 // Try to move vector bitcast after extract_subv by scaling extraction index:
24769 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24770 if (V.getOpcode() == ISD::BITCAST &&
24771 V.getOperand(0).getValueType().isVector() &&
24772 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24773 SDValue SrcOp = V.getOperand(0);
24774 EVT SrcVT = SrcOp.getValueType();
24775 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24776 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24777 if ((SrcNumElts % DestNumElts) == 0) {
24778 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24779 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24780 EVT NewExtVT =
24781 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24783 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24784 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24785 V.getOperand(0), NewIndex);
24786 return DAG.getBitcast(NVT, NewExtract);
24787 }
24788 }
24789 if ((DestNumElts % SrcNumElts) == 0) {
24790 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24791 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24792 ElementCount NewExtEC =
24793 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24794 EVT ScalarVT = SrcVT.getScalarType();
24795 if ((ExtIdx % DestSrcRatio) == 0) {
24796 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24797 EVT NewExtVT =
24798 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24800 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24801 SDValue NewExtract =
24802 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24803 V.getOperand(0), NewIndex);
24804 return DAG.getBitcast(NVT, NewExtract);
24805 }
24806 if (NewExtEC.isScalar() &&
24808 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24809 SDValue NewExtract =
24810 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24811 V.getOperand(0), NewIndex);
24812 return DAG.getBitcast(NVT, NewExtract);
24813 }
24814 }
24815 }
24816 }
24817 }
24818
24819 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24820 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24821 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24822 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24823 "Concat and extract subvector do not change element type");
24824 assert((ExtIdx % ExtNumElts) == 0 &&
24825 "Extract index is not a multiple of the input vector length.");
24826
24827 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24828 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24829
24830 // If the concatenated source types match this extract, it's a direct
24831 // simplification:
24832 // extract_subvec (concat V1, V2, ...), i --> Vi
24833 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24834 return V.getOperand(ConcatOpIdx);
24835
24836 // If the concatenated source vectors are a multiple length of this extract,
24837 // then extract a fraction of one of those source vectors directly from a
24838 // concat operand. Example:
24839 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24840 // v2i8 extract_subvec v8i8 Y, 6
24841 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24842 ConcatSrcNumElts % ExtNumElts == 0) {
24843 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24844 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24845 "Trying to extract from >1 concat operand?");
24846 assert(NewExtIdx % ExtNumElts == 0 &&
24847 "Extract index is not a multiple of the input vector length.");
24848 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24849 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24850 V.getOperand(ConcatOpIdx), NewIndexC);
24851 }
24852 }
24853
24854 if (SDValue V =
24855 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24856 return V;
24857
24859
24860 // If the input is a build vector. Try to make a smaller build vector.
24861 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24862 EVT InVT = V.getValueType();
24863 unsigned ExtractSize = NVT.getSizeInBits();
24864 unsigned EltSize = InVT.getScalarSizeInBits();
24865 // Only do this if we won't split any elements.
24866 if (ExtractSize % EltSize == 0) {
24867 unsigned NumElems = ExtractSize / EltSize;
24868 EVT EltVT = InVT.getVectorElementType();
24869 EVT ExtractVT =
24870 NumElems == 1 ? EltVT
24871 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24872 if ((Level < AfterLegalizeDAG ||
24873 (NumElems == 1 ||
24874 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24875 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24876 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24877
24878 if (NumElems == 1) {
24879 SDValue Src = V->getOperand(IdxVal);
24880 if (EltVT != Src.getValueType())
24881 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24882 return DAG.getBitcast(NVT, Src);
24883 }
24884
24885 // Extract the pieces from the original build_vector.
24886 SDValue BuildVec =
24887 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24888 return DAG.getBitcast(NVT, BuildVec);
24889 }
24890 }
24891 }
24892
24893 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24894 // Handle only simple case where vector being inserted and vector
24895 // being extracted are of same size.
24896 EVT SmallVT = V.getOperand(1).getValueType();
24897 if (!NVT.bitsEq(SmallVT))
24898 return SDValue();
24899
24900 // Combine:
24901 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24902 // Into:
24903 // indices are equal or bit offsets are equal => V1
24904 // otherwise => (extract_subvec V1, ExtIdx)
24905 uint64_t InsIdx = V.getConstantOperandVal(2);
24906 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24907 ExtIdx * NVT.getScalarSizeInBits()) {
24908 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24909 return SDValue();
24910
24911 return DAG.getBitcast(NVT, V.getOperand(1));
24912 }
24913 return DAG.getNode(
24915 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24916 N->getOperand(1));
24917 }
24918
24919 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24920 return NarrowBOp;
24921
24923 return SDValue(N, 0);
24924
24925 return SDValue();
24926}
24927
24928/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24929/// followed by concatenation. Narrow vector ops may have better performance
24930/// than wide ops, and this can unlock further narrowing of other vector ops.
24931/// Targets can invert this transform later if it is not profitable.
24933 SelectionDAG &DAG) {
24934 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24935 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24936 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24937 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24938 return SDValue();
24939
24940 // Split the wide shuffle mask into halves. Any mask element that is accessing
24941 // operand 1 is offset down to account for narrowing of the vectors.
24942 ArrayRef<int> Mask = Shuf->getMask();
24943 EVT VT = Shuf->getValueType(0);
24944 unsigned NumElts = VT.getVectorNumElements();
24945 unsigned HalfNumElts = NumElts / 2;
24946 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24947 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24948 for (unsigned i = 0; i != NumElts; ++i) {
24949 if (Mask[i] == -1)
24950 continue;
24951 // If we reference the upper (undef) subvector then the element is undef.
24952 if ((Mask[i] % NumElts) >= HalfNumElts)
24953 continue;
24954 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24955 if (i < HalfNumElts)
24956 Mask0[i] = M;
24957 else
24958 Mask1[i - HalfNumElts] = M;
24959 }
24960
24961 // Ask the target if this is a valid transform.
24962 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24963 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24964 HalfNumElts);
24965 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24966 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24967 return SDValue();
24968
24969 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24970 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24971 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24972 SDLoc DL(Shuf);
24973 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24974 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24975 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24976}
24977
24978// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24979// or turn a shuffle of a single concat into simpler shuffle then concat.
24981 EVT VT = N->getValueType(0);
24982 unsigned NumElts = VT.getVectorNumElements();
24983
24984 SDValue N0 = N->getOperand(0);
24985 SDValue N1 = N->getOperand(1);
24986 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24987 ArrayRef<int> Mask = SVN->getMask();
24988
24990 EVT ConcatVT = N0.getOperand(0).getValueType();
24991 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24992 unsigned NumConcats = NumElts / NumElemsPerConcat;
24993
24994 auto IsUndefMaskElt = [](int i) { return i == -1; };
24995
24996 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24997 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24998 // half vector elements.
24999 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
25000 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
25001 IsUndefMaskElt)) {
25002 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
25003 N0.getOperand(1),
25004 Mask.slice(0, NumElemsPerConcat));
25005 N1 = DAG.getUNDEF(ConcatVT);
25006 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
25007 }
25008
25009 // Look at every vector that's inserted. We're looking for exact
25010 // subvector-sized copies from a concatenated vector
25011 for (unsigned I = 0; I != NumConcats; ++I) {
25012 unsigned Begin = I * NumElemsPerConcat;
25013 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25014
25015 // Make sure we're dealing with a copy.
25016 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25017 Ops.push_back(DAG.getUNDEF(ConcatVT));
25018 continue;
25019 }
25020
25021 int OpIdx = -1;
25022 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25023 if (IsUndefMaskElt(SubMask[i]))
25024 continue;
25025 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25026 return SDValue();
25027 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25028 if (0 <= OpIdx && EltOpIdx != OpIdx)
25029 return SDValue();
25030 OpIdx = EltOpIdx;
25031 }
25032 assert(0 <= OpIdx && "Unknown concat_vectors op");
25033
25034 if (OpIdx < (int)N0.getNumOperands())
25035 Ops.push_back(N0.getOperand(OpIdx));
25036 else
25037 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25038 }
25039
25040 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25041}
25042
25043// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25044// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25045//
25046// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25047// a simplification in some sense, but it isn't appropriate in general: some
25048// BUILD_VECTORs are substantially cheaper than others. The general case
25049// of a BUILD_VECTOR requires inserting each element individually (or
25050// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25051// all constants is a single constant pool load. A BUILD_VECTOR where each
25052// element is identical is a splat. A BUILD_VECTOR where most of the operands
25053// are undef lowers to a small number of element insertions.
25054//
25055// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25056// We don't fold shuffles where one side is a non-zero constant, and we don't
25057// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25058// non-constant operands. This seems to work out reasonably well in practice.
25060 SelectionDAG &DAG,
25061 const TargetLowering &TLI) {
25062 EVT VT = SVN->getValueType(0);
25063 unsigned NumElts = VT.getVectorNumElements();
25064 SDValue N0 = SVN->getOperand(0);
25065 SDValue N1 = SVN->getOperand(1);
25066
25067 if (!N0->hasOneUse())
25068 return SDValue();
25069
25070 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25071 // discussed above.
25072 if (!N1.isUndef()) {
25073 if (!N1->hasOneUse())
25074 return SDValue();
25075
25076 bool N0AnyConst = isAnyConstantBuildVector(N0);
25077 bool N1AnyConst = isAnyConstantBuildVector(N1);
25078 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25079 return SDValue();
25080 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25081 return SDValue();
25082 }
25083
25084 // If both inputs are splats of the same value then we can safely merge this
25085 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25086 bool IsSplat = false;
25087 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25088 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25089 if (BV0 && BV1)
25090 if (SDValue Splat0 = BV0->getSplatValue())
25091 IsSplat = (Splat0 == BV1->getSplatValue());
25092
25094 SmallSet<SDValue, 16> DuplicateOps;
25095 for (int M : SVN->getMask()) {
25096 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25097 if (M >= 0) {
25098 int Idx = M < (int)NumElts ? M : M - NumElts;
25099 SDValue &S = (M < (int)NumElts ? N0 : N1);
25100 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25101 Op = S.getOperand(Idx);
25102 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25103 SDValue Op0 = S.getOperand(0);
25104 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25105 } else {
25106 // Operand can't be combined - bail out.
25107 return SDValue();
25108 }
25109 }
25110
25111 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25112 // generating a splat; semantically, this is fine, but it's likely to
25113 // generate low-quality code if the target can't reconstruct an appropriate
25114 // shuffle.
25115 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25116 if (!IsSplat && !DuplicateOps.insert(Op).second)
25117 return SDValue();
25118
25119 Ops.push_back(Op);
25120 }
25121
25122 // BUILD_VECTOR requires all inputs to be of the same type, find the
25123 // maximum type and extend them all.
25124 EVT SVT = VT.getScalarType();
25125 if (SVT.isInteger())
25126 for (SDValue &Op : Ops)
25127 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25128 if (SVT != VT.getScalarType())
25129 for (SDValue &Op : Ops)
25130 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25131 : (TLI.isZExtFree(Op.getValueType(), SVT)
25132 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25133 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25134 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25135}
25136
25137// Match shuffles that can be converted to *_vector_extend_in_reg.
25138// This is often generated during legalization.
25139// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25140// and returns the EVT to which the extension should be performed.
25141// NOTE: this assumes that the src is the first operand of the shuffle.
25143 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25144 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25145 bool LegalOperations) {
25146 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25147
25148 // TODO Add support for big-endian when we have a test case.
25149 if (!VT.isInteger() || IsBigEndian)
25150 return std::nullopt;
25151
25152 unsigned NumElts = VT.getVectorNumElements();
25153 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25154
25155 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25156 // power-of-2 extensions as they are the most likely.
25157 // FIXME: should try Scale == NumElts case too,
25158 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25159 // The vector width must be a multiple of Scale.
25160 if (NumElts % Scale != 0)
25161 continue;
25162
25163 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25164 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25165
25166 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25167 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25168 continue;
25169
25170 if (Match(Scale))
25171 return OutVT;
25172 }
25173
25174 return std::nullopt;
25175}
25176
25177// Match shuffles that can be converted to any_vector_extend_in_reg.
25178// This is often generated during legalization.
25179// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25181 SelectionDAG &DAG,
25182 const TargetLowering &TLI,
25183 bool LegalOperations) {
25184 EVT VT = SVN->getValueType(0);
25185 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25186
25187 // TODO Add support for big-endian when we have a test case.
25188 if (!VT.isInteger() || IsBigEndian)
25189 return SDValue();
25190
25191 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25192 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25193 Mask = SVN->getMask()](unsigned Scale) {
25194 for (unsigned i = 0; i != NumElts; ++i) {
25195 if (Mask[i] < 0)
25196 continue;
25197 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25198 continue;
25199 return false;
25200 }
25201 return true;
25202 };
25203
25204 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25205 SDValue N0 = SVN->getOperand(0);
25206 // Never create an illegal type. Only create unsupported operations if we
25207 // are pre-legalization.
25208 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25209 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25210 if (!OutVT)
25211 return SDValue();
25212 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25213}
25214
25215// Match shuffles that can be converted to zero_extend_vector_inreg.
25216// This is often generated during legalization.
25217// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25219 SelectionDAG &DAG,
25220 const TargetLowering &TLI,
25221 bool LegalOperations) {
25222 bool LegalTypes = true;
25223 EVT VT = SVN->getValueType(0);
25224 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25225 unsigned NumElts = VT.getVectorNumElements();
25226 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25227
25228 // TODO: add support for big-endian when we have a test case.
25229 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25230 if (!VT.isInteger() || IsBigEndian)
25231 return SDValue();
25232
25233 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
25234 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25235 for (int &Indice : Mask) {
25236 if (Indice < 0)
25237 continue;
25238 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25239 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25240 Fn(Indice, OpIdx, OpEltIdx);
25241 }
25242 };
25243
25244 // Which elements of which operand does this shuffle demand?
25245 std::array<APInt, 2> OpsDemandedElts;
25246 for (APInt &OpDemandedElts : OpsDemandedElts)
25247 OpDemandedElts = APInt::getZero(NumElts);
25248 ForEachDecomposedIndice(
25249 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25250 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25251 });
25252
25253 // Element-wise(!), which of these demanded elements are know to be zero?
25254 std::array<APInt, 2> OpsKnownZeroElts;
25255 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25256 std::get<2>(I) =
25257 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25258
25259 // Manifest zeroable element knowledge in the shuffle mask.
25260 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25261 // this is a local invention, but it won't leak into DAG.
25262 // FIXME: should we not manifest them, but just check when matching?
25263 bool HadZeroableElts = false;
25264 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25265 int &Indice, int OpIdx, int OpEltIdx) {
25266 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25267 Indice = -2; // Zeroable element.
25268 HadZeroableElts = true;
25269 }
25270 });
25271
25272 // Don't proceed unless we've refined at least one zeroable mask indice.
25273 // If we didn't, then we are still trying to match the same shuffle mask
25274 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25275 // and evidently failed. Proceeding will lead to endless combine loops.
25276 if (!HadZeroableElts)
25277 return SDValue();
25278
25279 // The shuffle may be more fine-grained than we want. Widen elements first.
25280 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25281 SmallVector<int, 16> ScaledMask;
25282 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25283 assert(Mask.size() >= ScaledMask.size() &&
25284 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25285 int Prescale = Mask.size() / ScaledMask.size();
25286
25287 NumElts = ScaledMask.size();
25288 EltSizeInBits *= Prescale;
25289
25290 EVT PrescaledVT = EVT::getVectorVT(
25291 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25292 NumElts);
25293
25294 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25295 return SDValue();
25296
25297 // For example,
25298 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25299 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25300 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25301 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25302 "Unexpected mask scaling factor.");
25303 ArrayRef<int> Mask = ScaledMask;
25304 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25305 SrcElt != NumSrcElts; ++SrcElt) {
25306 // Analyze the shuffle mask in Scale-sized chunks.
25307 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25308 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25309 Mask = Mask.drop_front(MaskChunk.size());
25310 // The first indice in this chunk must be SrcElt, but not zero!
25311 // FIXME: undef should be fine, but that results in more-defined result.
25312 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25313 return false;
25314 // The rest of the indices in this chunk must be zeros.
25315 // FIXME: undef should be fine, but that results in more-defined result.
25316 if (!all_of(MaskChunk.drop_front(1),
25317 [](int Indice) { return Indice == -2; }))
25318 return false;
25319 }
25320 assert(Mask.empty() && "Did not process the whole mask?");
25321 return true;
25322 };
25323
25324 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25325 for (bool Commuted : {false, true}) {
25326 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25327 if (Commuted)
25329 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25330 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25331 LegalOperations);
25332 if (OutVT)
25333 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25334 DAG.getBitcast(PrescaledVT, Op)));
25335 }
25336 return SDValue();
25337}
25338
25339// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25340// each source element of a large type into the lowest elements of a smaller
25341// destination type. This is often generated during legalization.
25342// If the source node itself was a '*_extend_vector_inreg' node then we should
25343// then be able to remove it.
25345 SelectionDAG &DAG) {
25346 EVT VT = SVN->getValueType(0);
25347 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25348
25349 // TODO Add support for big-endian when we have a test case.
25350 if (!VT.isInteger() || IsBigEndian)
25351 return SDValue();
25352
25354
25355 unsigned Opcode = N0.getOpcode();
25356 if (!ISD::isExtVecInRegOpcode(Opcode))
25357 return SDValue();
25358
25359 SDValue N00 = N0.getOperand(0);
25360 ArrayRef<int> Mask = SVN->getMask();
25361 unsigned NumElts = VT.getVectorNumElements();
25362 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25363 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25364 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25365
25366 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25367 return SDValue();
25368 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25369
25370 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25371 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25372 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25373 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25374 for (unsigned i = 0; i != NumElts; ++i) {
25375 if (Mask[i] < 0)
25376 continue;
25377 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25378 continue;
25379 return false;
25380 }
25381 return true;
25382 };
25383
25384 // At the moment we just handle the case where we've truncated back to the
25385 // same size as before the extension.
25386 // TODO: handle more extension/truncation cases as cases arise.
25387 if (EltSizeInBits != ExtSrcSizeInBits)
25388 return SDValue();
25389
25390 // We can remove *extend_vector_inreg only if the truncation happens at
25391 // the same scale as the extension.
25392 if (isTruncate(ExtScale))
25393 return DAG.getBitcast(VT, N00);
25394
25395 return SDValue();
25396}
25397
25398// Combine shuffles of splat-shuffles of the form:
25399// shuffle (shuffle V, undef, splat-mask), undef, M
25400// If splat-mask contains undef elements, we need to be careful about
25401// introducing undef's in the folded mask which are not the result of composing
25402// the masks of the shuffles.
25404 SelectionDAG &DAG) {
25405 EVT VT = Shuf->getValueType(0);
25406 unsigned NumElts = VT.getVectorNumElements();
25407
25408 if (!Shuf->getOperand(1).isUndef())
25409 return SDValue();
25410
25411 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25412 // in disguise, with all demanded elements being identical.
25413 // FIXME: this can be done per-operand.
25414 if (!Shuf->isSplat()) {
25415 APInt DemandedElts(NumElts, 0);
25416 for (int Idx : Shuf->getMask()) {
25417 if (Idx < 0)
25418 continue; // Ignore sentinel indices.
25419 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25420 DemandedElts.setBit(Idx);
25421 }
25422 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25423 APInt UndefElts;
25424 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25425 // Even if all demanded elements are splat, some of them could be undef.
25426 // Which lowest demanded element is *not* known-undef?
25427 std::optional<unsigned> MinNonUndefIdx;
25428 for (int Idx : Shuf->getMask()) {
25429 if (Idx < 0 || UndefElts[Idx])
25430 continue; // Ignore sentinel indices, and undef elements.
25431 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25432 }
25433 if (!MinNonUndefIdx)
25434 return DAG.getUNDEF(VT); // All undef - result is undef.
25435 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25436 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25437 Shuf->getMask().end());
25438 for (int &Idx : SplatMask) {
25439 if (Idx < 0)
25440 continue; // Passthrough sentinel indices.
25441 // Otherwise, just pick the lowest demanded non-undef element.
25442 // Or sentinel undef, if we know we'd pick a known-undef element.
25443 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25444 }
25445 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25446 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25447 Shuf->getOperand(1), SplatMask);
25448 }
25449 }
25450
25451 // If the inner operand is a known splat with no undefs, just return that directly.
25452 // TODO: Create DemandedElts mask from Shuf's mask.
25453 // TODO: Allow undef elements and merge with the shuffle code below.
25454 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25455 return Shuf->getOperand(0);
25456
25457 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25458 if (!Splat || !Splat->isSplat())
25459 return SDValue();
25460
25461 ArrayRef<int> ShufMask = Shuf->getMask();
25462 ArrayRef<int> SplatMask = Splat->getMask();
25463 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25464
25465 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25466 // every undef mask element in the splat-shuffle has a corresponding undef
25467 // element in the user-shuffle's mask or if the composition of mask elements
25468 // would result in undef.
25469 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25470 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25471 // In this case it is not legal to simplify to the splat-shuffle because we
25472 // may be exposing the users of the shuffle an undef element at index 1
25473 // which was not there before the combine.
25474 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25475 // In this case the composition of masks yields SplatMask, so it's ok to
25476 // simplify to the splat-shuffle.
25477 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25478 // In this case the composed mask includes all undef elements of SplatMask
25479 // and in addition sets element zero to undef. It is safe to simplify to
25480 // the splat-shuffle.
25481 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25482 ArrayRef<int> SplatMask) {
25483 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25484 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25485 SplatMask[UserMask[i]] != -1)
25486 return false;
25487 return true;
25488 };
25489 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25490 return Shuf->getOperand(0);
25491
25492 // Create a new shuffle with a mask that is composed of the two shuffles'
25493 // masks.
25494 SmallVector<int, 32> NewMask;
25495 for (int Idx : ShufMask)
25496 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25497
25498 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25499 Splat->getOperand(0), Splat->getOperand(1),
25500 NewMask);
25501}
25502
25503// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25504// the mask can be treated as a larger type.
25506 SelectionDAG &DAG,
25507 const TargetLowering &TLI,
25508 bool LegalOperations) {
25509 SDValue Op0 = SVN->getOperand(0);
25510 SDValue Op1 = SVN->getOperand(1);
25511 EVT VT = SVN->getValueType(0);
25512 if (Op0.getOpcode() != ISD::BITCAST)
25513 return SDValue();
25514 EVT InVT = Op0.getOperand(0).getValueType();
25515 if (!InVT.isVector() ||
25516 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25517 Op1.getOperand(0).getValueType() != InVT)))
25518 return SDValue();
25520 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25521 return SDValue();
25522
25523 int VTLanes = VT.getVectorNumElements();
25524 int InLanes = InVT.getVectorNumElements();
25525 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25526 (LegalOperations &&
25528 return SDValue();
25529 int Factor = VTLanes / InLanes;
25530
25531 // Check that each group of lanes in the mask are either undef or make a valid
25532 // mask for the wider lane type.
25533 ArrayRef<int> Mask = SVN->getMask();
25534 SmallVector<int> NewMask;
25535 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25536 return SDValue();
25537
25538 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25539 return SDValue();
25540
25541 // Create the new shuffle with the new mask and bitcast it back to the
25542 // original type.
25543 SDLoc DL(SVN);
25544 Op0 = Op0.getOperand(0);
25545 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25546 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25547 return DAG.getBitcast(VT, NewShuf);
25548}
25549
25550/// Combine shuffle of shuffle of the form:
25551/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25553 SelectionDAG &DAG) {
25554 if (!OuterShuf->getOperand(1).isUndef())
25555 return SDValue();
25556 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25557 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25558 return SDValue();
25559
25560 ArrayRef<int> OuterMask = OuterShuf->getMask();
25561 ArrayRef<int> InnerMask = InnerShuf->getMask();
25562 unsigned NumElts = OuterMask.size();
25563 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25564 SmallVector<int, 32> CombinedMask(NumElts, -1);
25565 int SplatIndex = -1;
25566 for (unsigned i = 0; i != NumElts; ++i) {
25567 // Undef lanes remain undef.
25568 int OuterMaskElt = OuterMask[i];
25569 if (OuterMaskElt == -1)
25570 continue;
25571
25572 // Peek through the shuffle masks to get the underlying source element.
25573 int InnerMaskElt = InnerMask[OuterMaskElt];
25574 if (InnerMaskElt == -1)
25575 continue;
25576
25577 // Initialize the splatted element.
25578 if (SplatIndex == -1)
25579 SplatIndex = InnerMaskElt;
25580
25581 // Non-matching index - this is not a splat.
25582 if (SplatIndex != InnerMaskElt)
25583 return SDValue();
25584
25585 CombinedMask[i] = InnerMaskElt;
25586 }
25587 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25588 getSplatIndex(CombinedMask) != -1) &&
25589 "Expected a splat mask");
25590
25591 // TODO: The transform may be a win even if the mask is not legal.
25592 EVT VT = OuterShuf->getValueType(0);
25593 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25594 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25595 return SDValue();
25596
25597 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25598 InnerShuf->getOperand(1), CombinedMask);
25599}
25600
25601/// If the shuffle mask is taking exactly one element from the first vector
25602/// operand and passing through all other elements from the second vector
25603/// operand, return the index of the mask element that is choosing an element
25604/// from the first operand. Otherwise, return -1.
25606 int MaskSize = Mask.size();
25607 int EltFromOp0 = -1;
25608 // TODO: This does not match if there are undef elements in the shuffle mask.
25609 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25610 // removing an instruction (a shuffle), but losing the knowledge that some
25611 // vector lanes are not needed.
25612 for (int i = 0; i != MaskSize; ++i) {
25613 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25614 // We're looking for a shuffle of exactly one element from operand 0.
25615 if (EltFromOp0 != -1)
25616 return -1;
25617 EltFromOp0 = i;
25618 } else if (Mask[i] != i + MaskSize) {
25619 // Nothing from operand 1 can change lanes.
25620 return -1;
25621 }
25622 }
25623 return EltFromOp0;
25624}
25625
25626/// If a shuffle inserts exactly one element from a source vector operand into
25627/// another vector operand and we can access the specified element as a scalar,
25628/// then we can eliminate the shuffle.
25630 SelectionDAG &DAG) {
25631 // First, check if we are taking one element of a vector and shuffling that
25632 // element into another vector.
25633 ArrayRef<int> Mask = Shuf->getMask();
25634 SmallVector<int, 16> CommutedMask(Mask);
25635 SDValue Op0 = Shuf->getOperand(0);
25636 SDValue Op1 = Shuf->getOperand(1);
25637 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25638 if (ShufOp0Index == -1) {
25639 // Commute mask and check again.
25641 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25642 if (ShufOp0Index == -1)
25643 return SDValue();
25644 // Commute operands to match the commuted shuffle mask.
25645 std::swap(Op0, Op1);
25646 Mask = CommutedMask;
25647 }
25648
25649 // The shuffle inserts exactly one element from operand 0 into operand 1.
25650 // Now see if we can access that element as a scalar via a real insert element
25651 // instruction.
25652 // TODO: We can try harder to locate the element as a scalar. Examples: it
25653 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25654 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25655 "Shuffle mask value must be from operand 0");
25656 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25657 return SDValue();
25658
25659 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25660 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25661 return SDValue();
25662
25663 // There's an existing insertelement with constant insertion index, so we
25664 // don't need to check the legality/profitability of a replacement operation
25665 // that differs at most in the constant value. The target should be able to
25666 // lower any of those in a similar way. If not, legalization will expand this
25667 // to a scalar-to-vector plus shuffle.
25668 //
25669 // Note that the shuffle may move the scalar from the position that the insert
25670 // element used. Therefore, our new insert element occurs at the shuffle's
25671 // mask index value, not the insert's index value.
25672 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25673 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25674 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25675 Op1, Op0.getOperand(1), NewInsIndex);
25676}
25677
25678/// If we have a unary shuffle of a shuffle, see if it can be folded away
25679/// completely. This has the potential to lose undef knowledge because the first
25680/// shuffle may not have an undef mask element where the second one does. So
25681/// only call this after doing simplifications based on demanded elements.
25683 // shuf (shuf0 X, Y, Mask0), undef, Mask
25684 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25685 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25686 return SDValue();
25687
25688 ArrayRef<int> Mask = Shuf->getMask();
25689 ArrayRef<int> Mask0 = Shuf0->getMask();
25690 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25691 // Ignore undef elements.
25692 if (Mask[i] == -1)
25693 continue;
25694 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25695
25696 // Is the element of the shuffle operand chosen by this shuffle the same as
25697 // the element chosen by the shuffle operand itself?
25698 if (Mask0[Mask[i]] != Mask0[i])
25699 return SDValue();
25700 }
25701 // Every element of this shuffle is identical to the result of the previous
25702 // shuffle, so we can replace this value.
25703 return Shuf->getOperand(0);
25704}
25705
25706SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25707 EVT VT = N->getValueType(0);
25708 unsigned NumElts = VT.getVectorNumElements();
25709
25710 SDValue N0 = N->getOperand(0);
25711 SDValue N1 = N->getOperand(1);
25712
25713 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25714
25715 // Canonicalize shuffle undef, undef -> undef
25716 if (N0.isUndef() && N1.isUndef())
25717 return DAG.getUNDEF(VT);
25718
25719 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25720
25721 // Canonicalize shuffle v, v -> v, undef
25722 if (N0 == N1)
25723 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25724 createUnaryMask(SVN->getMask(), NumElts));
25725
25726 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25727 if (N0.isUndef())
25728 return DAG.getCommutedVectorShuffle(*SVN);
25729
25730 // Remove references to rhs if it is undef
25731 if (N1.isUndef()) {
25732 bool Changed = false;
25733 SmallVector<int, 8> NewMask;
25734 for (unsigned i = 0; i != NumElts; ++i) {
25735 int Idx = SVN->getMaskElt(i);
25736 if (Idx >= (int)NumElts) {
25737 Idx = -1;
25738 Changed = true;
25739 }
25740 NewMask.push_back(Idx);
25741 }
25742 if (Changed)
25743 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25744 }
25745
25746 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25747 return InsElt;
25748
25749 // A shuffle of a single vector that is a splatted value can always be folded.
25750 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25751 return V;
25752
25753 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25754 return V;
25755
25756 // If it is a splat, check if the argument vector is another splat or a
25757 // build_vector.
25758 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25759 int SplatIndex = SVN->getSplatIndex();
25760 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25761 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25762 // splat (vector_bo L, R), Index -->
25763 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25764 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25765 SDLoc DL(N);
25766 EVT EltVT = VT.getScalarType();
25767 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25768 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25769 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25770 SDValue NewBO =
25771 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25772 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25774 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25775 }
25776
25777 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25778 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25779 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25780 N0.hasOneUse()) {
25781 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25782 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25783
25785 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25786 if (Idx->getAPIntValue() == SplatIndex)
25787 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25788
25789 // Look through a bitcast if LE and splatting lane 0, through to a
25790 // scalar_to_vector or a build_vector.
25791 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25792 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25795 EVT N00VT = N0.getOperand(0).getValueType();
25796 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25797 VT.isInteger() && N00VT.isInteger()) {
25798 EVT InVT =
25801 SDLoc(N), InVT);
25802 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25803 }
25804 }
25805 }
25806
25807 // If this is a bit convert that changes the element type of the vector but
25808 // not the number of vector elements, look through it. Be careful not to
25809 // look though conversions that change things like v4f32 to v2f64.
25810 SDNode *V = N0.getNode();
25811 if (V->getOpcode() == ISD::BITCAST) {
25812 SDValue ConvInput = V->getOperand(0);
25813 if (ConvInput.getValueType().isVector() &&
25814 ConvInput.getValueType().getVectorNumElements() == NumElts)
25815 V = ConvInput.getNode();
25816 }
25817
25818 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25819 assert(V->getNumOperands() == NumElts &&
25820 "BUILD_VECTOR has wrong number of operands");
25821 SDValue Base;
25822 bool AllSame = true;
25823 for (unsigned i = 0; i != NumElts; ++i) {
25824 if (!V->getOperand(i).isUndef()) {
25825 Base = V->getOperand(i);
25826 break;
25827 }
25828 }
25829 // Splat of <u, u, u, u>, return <u, u, u, u>
25830 if (!Base.getNode())
25831 return N0;
25832 for (unsigned i = 0; i != NumElts; ++i) {
25833 if (V->getOperand(i) != Base) {
25834 AllSame = false;
25835 break;
25836 }
25837 }
25838 // Splat of <x, x, x, x>, return <x, x, x, x>
25839 if (AllSame)
25840 return N0;
25841
25842 // Canonicalize any other splat as a build_vector.
25843 SDValue Splatted = V->getOperand(SplatIndex);
25844 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25845 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25846
25847 // We may have jumped through bitcasts, so the type of the
25848 // BUILD_VECTOR may not match the type of the shuffle.
25849 if (V->getValueType(0) != VT)
25850 NewBV = DAG.getBitcast(VT, NewBV);
25851 return NewBV;
25852 }
25853 }
25854
25855 // Simplify source operands based on shuffle mask.
25857 return SDValue(N, 0);
25858
25859 // This is intentionally placed after demanded elements simplification because
25860 // it could eliminate knowledge of undef elements created by this shuffle.
25861 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25862 return ShufOp;
25863
25864 // Match shuffles that can be converted to any_vector_extend_in_reg.
25865 if (SDValue V =
25866 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25867 return V;
25868
25869 // Combine "truncate_vector_in_reg" style shuffles.
25870 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25871 return V;
25872
25873 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25874 Level < AfterLegalizeVectorOps &&
25875 (N1.isUndef() ||
25876 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25877 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25878 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25879 return V;
25880 }
25881
25882 // A shuffle of a concat of the same narrow vector can be reduced to use
25883 // only low-half elements of a concat with undef:
25884 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25885 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25886 N0.getNumOperands() == 2 &&
25887 N0.getOperand(0) == N0.getOperand(1)) {
25888 int HalfNumElts = (int)NumElts / 2;
25889 SmallVector<int, 8> NewMask;
25890 for (unsigned i = 0; i != NumElts; ++i) {
25891 int Idx = SVN->getMaskElt(i);
25892 if (Idx >= HalfNumElts) {
25893 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25894 Idx -= HalfNumElts;
25895 }
25896 NewMask.push_back(Idx);
25897 }
25898 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25899 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25900 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25901 N0.getOperand(0), UndefVec);
25902 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25903 }
25904 }
25905
25906 // See if we can replace a shuffle with an insert_subvector.
25907 // e.g. v2i32 into v8i32:
25908 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25909 // --> insert_subvector(lhs,rhs1,4).
25910 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25912 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25913 // Ensure RHS subvectors are legal.
25914 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25915 EVT SubVT = RHS.getOperand(0).getValueType();
25916 int NumSubVecs = RHS.getNumOperands();
25917 int NumSubElts = SubVT.getVectorNumElements();
25918 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25919 if (!TLI.isTypeLegal(SubVT))
25920 return SDValue();
25921
25922 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25923 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25924 return SDValue();
25925
25926 // Search [NumSubElts] spans for RHS sequence.
25927 // TODO: Can we avoid nested loops to increase performance?
25928 SmallVector<int> InsertionMask(NumElts);
25929 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25930 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25931 // Reset mask to identity.
25932 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25933
25934 // Add subvector insertion.
25935 std::iota(InsertionMask.begin() + SubIdx,
25936 InsertionMask.begin() + SubIdx + NumSubElts,
25937 NumElts + (SubVec * NumSubElts));
25938
25939 // See if the shuffle mask matches the reference insertion mask.
25940 bool MatchingShuffle = true;
25941 for (int i = 0; i != (int)NumElts; ++i) {
25942 int ExpectIdx = InsertionMask[i];
25943 int ActualIdx = Mask[i];
25944 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25945 MatchingShuffle = false;
25946 break;
25947 }
25948 }
25949
25950 if (MatchingShuffle)
25951 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25952 RHS.getOperand(SubVec),
25953 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25954 }
25955 }
25956 return SDValue();
25957 };
25958 ArrayRef<int> Mask = SVN->getMask();
25959 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25960 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25961 return InsertN1;
25962 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25963 SmallVector<int> CommuteMask(Mask);
25965 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25966 return InsertN0;
25967 }
25968 }
25969
25970 // If we're not performing a select/blend shuffle, see if we can convert the
25971 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25972 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25973 bool IsInLaneMask = true;
25974 ArrayRef<int> Mask = SVN->getMask();
25975 SmallVector<int, 16> ClearMask(NumElts, -1);
25976 APInt DemandedLHS = APInt::getZero(NumElts);
25977 APInt DemandedRHS = APInt::getZero(NumElts);
25978 for (int I = 0; I != (int)NumElts; ++I) {
25979 int M = Mask[I];
25980 if (M < 0)
25981 continue;
25982 ClearMask[I] = M == I ? I : (I + NumElts);
25983 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25984 if (M != I) {
25985 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25986 Demanded.setBit(M % NumElts);
25987 }
25988 }
25989 // TODO: Should we try to mask with N1 as well?
25990 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25991 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25992 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25993 SDLoc DL(N);
25996 // Transform the type to a legal type so that the buildvector constant
25997 // elements are not illegal. Make sure that the result is larger than the
25998 // original type, incase the value is split into two (eg i64->i32).
25999 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
26000 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
26001 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
26002 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
26003 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
26004 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
26005 for (int I = 0; I != (int)NumElts; ++I)
26006 if (0 <= Mask[I])
26007 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
26008
26009 // See if a clear mask is legal instead of going via
26010 // XformToShuffleWithZero which loses UNDEF mask elements.
26011 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26012 return DAG.getBitcast(
26013 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26014 DAG.getConstant(0, DL, IntVT), ClearMask));
26015
26016 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26017 return DAG.getBitcast(
26018 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26019 DAG.getBuildVector(IntVT, DL, AndMask)));
26020 }
26021 }
26022 }
26023
26024 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26025 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26026 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26027 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26028 return Res;
26029
26030 // If this shuffle only has a single input that is a bitcasted shuffle,
26031 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26032 // back to their original types.
26033 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26034 N1.isUndef() && Level < AfterLegalizeVectorOps &&
26035 TLI.isTypeLegal(VT)) {
26036
26038 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26039 EVT SVT = VT.getScalarType();
26040 EVT InnerVT = BC0->getValueType(0);
26041 EVT InnerSVT = InnerVT.getScalarType();
26042
26043 // Determine which shuffle works with the smaller scalar type.
26044 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26045 EVT ScaleSVT = ScaleVT.getScalarType();
26046
26047 if (TLI.isTypeLegal(ScaleVT) &&
26048 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26049 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26050 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26051 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26052
26053 // Scale the shuffle masks to the smaller scalar type.
26054 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26055 SmallVector<int, 8> InnerMask;
26056 SmallVector<int, 8> OuterMask;
26057 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26058 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26059
26060 // Merge the shuffle masks.
26061 SmallVector<int, 8> NewMask;
26062 for (int M : OuterMask)
26063 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26064
26065 // Test for shuffle mask legality over both commutations.
26066 SDValue SV0 = BC0->getOperand(0);
26067 SDValue SV1 = BC0->getOperand(1);
26068 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26069 if (!LegalMask) {
26070 std::swap(SV0, SV1);
26072 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26073 }
26074
26075 if (LegalMask) {
26076 SV0 = DAG.getBitcast(ScaleVT, SV0);
26077 SV1 = DAG.getBitcast(ScaleVT, SV1);
26078 return DAG.getBitcast(
26079 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26080 }
26081 }
26082 }
26083 }
26084
26085 // Match shuffles of bitcasts, so long as the mask can be treated as the
26086 // larger type.
26087 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26088 return V;
26089
26090 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26091 // operand, and SV1 as the second operand.
26092 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26093 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26094 auto MergeInnerShuffle =
26095 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26096 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26097 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26098 SmallVectorImpl<int> &Mask) -> bool {
26099 // Don't try to fold splats; they're likely to simplify somehow, or they
26100 // might be free.
26101 if (OtherSVN->isSplat())
26102 return false;
26103
26104 SV0 = SV1 = SDValue();
26105 Mask.clear();
26106
26107 for (unsigned i = 0; i != NumElts; ++i) {
26108 int Idx = SVN->getMaskElt(i);
26109 if (Idx < 0) {
26110 // Propagate Undef.
26111 Mask.push_back(Idx);
26112 continue;
26113 }
26114
26115 if (Commute)
26116 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26117
26118 SDValue CurrentVec;
26119 if (Idx < (int)NumElts) {
26120 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26121 // shuffle mask to identify which vector is actually referenced.
26122 Idx = OtherSVN->getMaskElt(Idx);
26123 if (Idx < 0) {
26124 // Propagate Undef.
26125 Mask.push_back(Idx);
26126 continue;
26127 }
26128 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26129 : OtherSVN->getOperand(1);
26130 } else {
26131 // This shuffle index references an element within N1.
26132 CurrentVec = N1;
26133 }
26134
26135 // Simple case where 'CurrentVec' is UNDEF.
26136 if (CurrentVec.isUndef()) {
26137 Mask.push_back(-1);
26138 continue;
26139 }
26140
26141 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26142 // will be the first or second operand of the combined shuffle.
26143 Idx = Idx % NumElts;
26144 if (!SV0.getNode() || SV0 == CurrentVec) {
26145 // Ok. CurrentVec is the left hand side.
26146 // Update the mask accordingly.
26147 SV0 = CurrentVec;
26148 Mask.push_back(Idx);
26149 continue;
26150 }
26151 if (!SV1.getNode() || SV1 == CurrentVec) {
26152 // Ok. CurrentVec is the right hand side.
26153 // Update the mask accordingly.
26154 SV1 = CurrentVec;
26155 Mask.push_back(Idx + NumElts);
26156 continue;
26157 }
26158
26159 // Last chance - see if the vector is another shuffle and if it
26160 // uses one of the existing candidate shuffle ops.
26161 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26162 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26163 if (InnerIdx < 0) {
26164 Mask.push_back(-1);
26165 continue;
26166 }
26167 SDValue InnerVec = (InnerIdx < (int)NumElts)
26168 ? CurrentSVN->getOperand(0)
26169 : CurrentSVN->getOperand(1);
26170 if (InnerVec.isUndef()) {
26171 Mask.push_back(-1);
26172 continue;
26173 }
26174 InnerIdx %= NumElts;
26175 if (InnerVec == SV0) {
26176 Mask.push_back(InnerIdx);
26177 continue;
26178 }
26179 if (InnerVec == SV1) {
26180 Mask.push_back(InnerIdx + NumElts);
26181 continue;
26182 }
26183 }
26184
26185 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26186 return false;
26187 }
26188
26189 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26190 return true;
26191
26192 // Avoid introducing shuffles with illegal mask.
26193 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26194 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26195 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26196 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26197 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26198 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26199 if (TLI.isShuffleMaskLegal(Mask, VT))
26200 return true;
26201
26202 std::swap(SV0, SV1);
26204 return TLI.isShuffleMaskLegal(Mask, VT);
26205 };
26206
26207 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26208 // Canonicalize shuffles according to rules:
26209 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26210 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26211 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26212 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26214 // The incoming shuffle must be of the same type as the result of the
26215 // current shuffle.
26216 assert(N1->getOperand(0).getValueType() == VT &&
26217 "Shuffle types don't match");
26218
26219 SDValue SV0 = N1->getOperand(0);
26220 SDValue SV1 = N1->getOperand(1);
26221 bool HasSameOp0 = N0 == SV0;
26222 bool IsSV1Undef = SV1.isUndef();
26223 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26224 // Commute the operands of this shuffle so merging below will trigger.
26225 return DAG.getCommutedVectorShuffle(*SVN);
26226 }
26227
26228 // Canonicalize splat shuffles to the RHS to improve merging below.
26229 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26230 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26231 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26232 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26233 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26234 return DAG.getCommutedVectorShuffle(*SVN);
26235 }
26236
26237 // Try to fold according to rules:
26238 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26239 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26240 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26241 // Don't try to fold shuffles with illegal type.
26242 // Only fold if this shuffle is the only user of the other shuffle.
26243 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26244 for (int i = 0; i != 2; ++i) {
26245 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26246 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26247 // The incoming shuffle must be of the same type as the result of the
26248 // current shuffle.
26249 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26250 assert(OtherSV->getOperand(0).getValueType() == VT &&
26251 "Shuffle types don't match");
26252
26253 SDValue SV0, SV1;
26255 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26256 SV0, SV1, Mask)) {
26257 // Check if all indices in Mask are Undef. In case, propagate Undef.
26258 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26259 return DAG.getUNDEF(VT);
26260
26261 return DAG.getVectorShuffle(VT, SDLoc(N),
26262 SV0 ? SV0 : DAG.getUNDEF(VT),
26263 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26264 }
26265 }
26266 }
26267
26268 // Merge shuffles through binops if we are able to merge it with at least
26269 // one other shuffles.
26270 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26271 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26272 unsigned SrcOpcode = N0.getOpcode();
26273 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26274 (N1.isUndef() ||
26275 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26276 // Get binop source ops, or just pass on the undef.
26277 SDValue Op00 = N0.getOperand(0);
26278 SDValue Op01 = N0.getOperand(1);
26279 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26280 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26281 // TODO: We might be able to relax the VT check but we don't currently
26282 // have any isBinOp() that has different result/ops VTs so play safe until
26283 // we have test coverage.
26284 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26285 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26286 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26287 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26288 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26289 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26290 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26291 SmallVectorImpl<int> &Mask, bool LeftOp,
26292 bool Commute) {
26293 SDValue InnerN = Commute ? N1 : N0;
26294 SDValue Op0 = LeftOp ? Op00 : Op01;
26295 SDValue Op1 = LeftOp ? Op10 : Op11;
26296 if (Commute)
26297 std::swap(Op0, Op1);
26298 // Only accept the merged shuffle if we don't introduce undef elements,
26299 // or the inner shuffle already contained undef elements.
26300 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26301 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26302 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26303 Mask) &&
26304 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26305 llvm::none_of(Mask, [](int M) { return M < 0; }));
26306 };
26307
26308 // Ensure we don't increase the number of shuffles - we must merge a
26309 // shuffle from at least one of the LHS and RHS ops.
26310 bool MergedLeft = false;
26311 SDValue LeftSV0, LeftSV1;
26312 SmallVector<int, 4> LeftMask;
26313 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26314 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26315 MergedLeft = true;
26316 } else {
26317 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26318 LeftSV0 = Op00, LeftSV1 = Op10;
26319 }
26320
26321 bool MergedRight = false;
26322 SDValue RightSV0, RightSV1;
26323 SmallVector<int, 4> RightMask;
26324 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26325 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26326 MergedRight = true;
26327 } else {
26328 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26329 RightSV0 = Op01, RightSV1 = Op11;
26330 }
26331
26332 if (MergedLeft || MergedRight) {
26333 SDLoc DL(N);
26335 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26336 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26338 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26339 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26340 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26341 }
26342 }
26343 }
26344 }
26345
26346 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26347 return V;
26348
26349 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26350 // Perform this really late, because it could eliminate knowledge
26351 // of undef elements created by this shuffle.
26352 if (Level < AfterLegalizeTypes)
26353 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26354 LegalOperations))
26355 return V;
26356
26357 return SDValue();
26358}
26359
26360SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26361 EVT VT = N->getValueType(0);
26362 if (!VT.isFixedLengthVector())
26363 return SDValue();
26364
26365 // Try to convert a scalar binop with an extracted vector element to a vector
26366 // binop. This is intended to reduce potentially expensive register moves.
26367 // TODO: Check if both operands are extracted.
26368 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26369 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26370 SDValue Scalar = N->getOperand(0);
26371 unsigned Opcode = Scalar.getOpcode();
26372 EVT VecEltVT = VT.getScalarType();
26373 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26374 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26375 Scalar.getOperand(0).getValueType() == VecEltVT &&
26376 Scalar.getOperand(1).getValueType() == VecEltVT &&
26377 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26378 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26379 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26380 // Match an extract element and get a shuffle mask equivalent.
26381 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26382
26383 for (int i : {0, 1}) {
26384 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26385 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26386 SDValue EE = Scalar.getOperand(i);
26387 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26388 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26389 EE.getOperand(0).getValueType() == VT &&
26390 isa<ConstantSDNode>(EE.getOperand(1))) {
26391 // Mask = {ExtractIndex, undef, undef....}
26392 ShufMask[0] = EE.getConstantOperandVal(1);
26393 // Make sure the shuffle is legal if we are crossing lanes.
26394 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26395 SDLoc DL(N);
26396 SDValue V[] = {EE.getOperand(0),
26397 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26398 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26399 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26400 ShufMask);
26401 }
26402 }
26403 }
26404 }
26405
26406 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26407 // with a VECTOR_SHUFFLE and possible truncate.
26408 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26409 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26410 return SDValue();
26411
26412 // If we have an implicit truncate, truncate here if it is legal.
26413 if (VecEltVT != Scalar.getValueType() &&
26414 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26415 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26416 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26417 }
26418
26419 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26420 if (!ExtIndexC)
26421 return SDValue();
26422
26423 SDValue SrcVec = Scalar.getOperand(0);
26424 EVT SrcVT = SrcVec.getValueType();
26425 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26426 unsigned VTNumElts = VT.getVectorNumElements();
26427 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26428 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26429 SmallVector<int, 8> Mask(SrcNumElts, -1);
26430 Mask[0] = ExtIndexC->getZExtValue();
26431 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26432 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26433 if (!LegalShuffle)
26434 return SDValue();
26435
26436 // If the initial vector is the same size, the shuffle is the result.
26437 if (VT == SrcVT)
26438 return LegalShuffle;
26439
26440 // If not, shorten the shuffled vector.
26441 if (VTNumElts != SrcNumElts) {
26442 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26443 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26444 SrcVT.getVectorElementType(), VTNumElts);
26445 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26446 ZeroIdx);
26447 }
26448 }
26449
26450 return SDValue();
26451}
26452
26453SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26454 EVT VT = N->getValueType(0);
26455 SDValue N0 = N->getOperand(0);
26456 SDValue N1 = N->getOperand(1);
26457 SDValue N2 = N->getOperand(2);
26458 uint64_t InsIdx = N->getConstantOperandVal(2);
26459
26460 // If inserting an UNDEF, just return the original vector.
26461 if (N1.isUndef())
26462 return N0;
26463
26464 // If this is an insert of an extracted vector into an undef vector, we can
26465 // just use the input to the extract if the types match, and can simplify
26466 // in some cases even if they don't.
26467 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26468 N1.getOperand(1) == N2) {
26469 EVT SrcVT = N1.getOperand(0).getValueType();
26470 if (SrcVT == VT)
26471 return N1.getOperand(0);
26472 // TODO: To remove the zero check, need to adjust the offset to
26473 // a multiple of the new src type.
26474 if (isNullConstant(N2)) {
26475 if (VT.knownBitsGE(SrcVT) &&
26476 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
26477 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26478 VT, N0, N1.getOperand(0), N2);
26479 else if (VT.knownBitsLE(SrcVT) &&
26480 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
26482 VT, N1.getOperand(0), N2);
26483 }
26484 }
26485
26486 // Handle case where we've ended up inserting back into the source vector
26487 // we extracted the subvector from.
26488 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26489 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26490 N1.getOperand(1) == N2)
26491 return N0;
26492
26493 // Simplify scalar inserts into an undef vector:
26494 // insert_subvector undef, (splat X), N2 -> splat X
26495 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26496 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26497 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26498
26499 // If we are inserting a bitcast value into an undef, with the same
26500 // number of elements, just use the bitcast input of the extract.
26501 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26502 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26503 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26505 N1.getOperand(0).getOperand(1) == N2 &&
26507 VT.getVectorElementCount() &&
26509 VT.getSizeInBits()) {
26510 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26511 }
26512
26513 // If both N1 and N2 are bitcast values on which insert_subvector
26514 // would makes sense, pull the bitcast through.
26515 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26516 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26517 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26518 SDValue CN0 = N0.getOperand(0);
26519 SDValue CN1 = N1.getOperand(0);
26520 EVT CN0VT = CN0.getValueType();
26521 EVT CN1VT = CN1.getValueType();
26522 if (CN0VT.isVector() && CN1VT.isVector() &&
26523 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26525 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26526 CN0.getValueType(), CN0, CN1, N2);
26527 return DAG.getBitcast(VT, NewINSERT);
26528 }
26529 }
26530
26531 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26532 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26533 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26534 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26535 N0.getOperand(1).getValueType() == N1.getValueType() &&
26536 N0.getOperand(2) == N2)
26537 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26538 N1, N2);
26539
26540 // Eliminate an intermediate insert into an undef vector:
26541 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26542 // insert_subvector undef, X, 0
26543 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26544 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26545 isNullConstant(N2))
26546 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26547 N1.getOperand(1), N2);
26548
26549 // Push subvector bitcasts to the output, adjusting the index as we go.
26550 // insert_subvector(bitcast(v), bitcast(s), c1)
26551 // -> bitcast(insert_subvector(v, s, c2))
26552 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26553 N1.getOpcode() == ISD::BITCAST) {
26554 SDValue N0Src = peekThroughBitcasts(N0);
26555 SDValue N1Src = peekThroughBitcasts(N1);
26556 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26557 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26558 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26559 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26560 EVT NewVT;
26561 SDLoc DL(N);
26562 SDValue NewIdx;
26563 LLVMContext &Ctx = *DAG.getContext();
26564 ElementCount NumElts = VT.getVectorElementCount();
26565 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26566 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26567 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26568 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26569 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26570 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26571 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26572 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26573 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26574 NumElts.divideCoefficientBy(Scale));
26575 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26576 }
26577 }
26578 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26579 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26580 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26581 return DAG.getBitcast(VT, Res);
26582 }
26583 }
26584 }
26585
26586 // Canonicalize insert_subvector dag nodes.
26587 // Example:
26588 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26589 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26590 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26591 N1.getValueType() == N0.getOperand(1).getValueType()) {
26592 unsigned OtherIdx = N0.getConstantOperandVal(2);
26593 if (InsIdx < OtherIdx) {
26594 // Swap nodes.
26595 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26596 N0.getOperand(0), N1, N2);
26597 AddToWorklist(NewOp.getNode());
26598 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26599 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26600 }
26601 }
26602
26603 // If the input vector is a concatenation, and the insert replaces
26604 // one of the pieces, we can optimize into a single concat_vectors.
26605 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26606 N0.getOperand(0).getValueType() == N1.getValueType() &&
26609 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26610 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26611 Ops[InsIdx / Factor] = N1;
26612 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26613 }
26614
26615 // Simplify source operands based on insertion.
26617 return SDValue(N, 0);
26618
26619 return SDValue();
26620}
26621
26622SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26623 SDValue N0 = N->getOperand(0);
26624
26625 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26626 if (N0->getOpcode() == ISD::FP16_TO_FP)
26627 return N0->getOperand(0);
26628
26629 return SDValue();
26630}
26631
26632SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26633 auto Op = N->getOpcode();
26635 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26636 SDValue N0 = N->getOperand(0);
26637
26638 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26639 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26640 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26642 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26643 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26644 }
26645 }
26646
26647 // Sometimes constants manage to survive very late in the pipeline, e.g.,
26648 // because they are wrapped inside the <1 x f16> type. Try one last time to
26649 // get rid of them.
26650 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
26651 N->getValueType(0), {N0});
26652 return Folded;
26653}
26654
26655SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26656 SDValue N0 = N->getOperand(0);
26657
26658 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26659 if (N0->getOpcode() == ISD::BF16_TO_FP)
26660 return N0->getOperand(0);
26661
26662 return SDValue();
26663}
26664
26665SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26666 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26667 return visitFP16_TO_FP(N);
26668}
26669
26670SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26671 SDValue N0 = N->getOperand(0);
26672 EVT VT = N0.getValueType();
26673 unsigned Opcode = N->getOpcode();
26674
26675 // VECREDUCE over 1-element vector is just an extract.
26676 if (VT.getVectorElementCount().isScalar()) {
26677 SDLoc dl(N);
26678 SDValue Res =
26680 DAG.getVectorIdxConstant(0, dl));
26681 if (Res.getValueType() != N->getValueType(0))
26682 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26683 return Res;
26684 }
26685
26686 // On an boolean vector an and/or reduction is the same as a umin/umax
26687 // reduction. Convert them if the latter is legal while the former isn't.
26688 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26689 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26691 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26692 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26694 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26695 }
26696
26697 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26698 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26699 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26700 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26701 SDValue Vec = N0.getOperand(0);
26702 SDValue Subvec = N0.getOperand(1);
26703 if ((Opcode == ISD::VECREDUCE_OR &&
26704 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26705 (Opcode == ISD::VECREDUCE_AND &&
26706 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26707 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26708 }
26709
26710 return SDValue();
26711}
26712
26713SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26714 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26715
26716 // FSUB -> FMA combines:
26717 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26718 AddToWorklist(Fused.getNode());
26719 return Fused;
26720 }
26721 return SDValue();
26722}
26723
26724SDValue DAGCombiner::visitVPOp(SDNode *N) {
26725
26726 if (N->getOpcode() == ISD::VP_GATHER)
26727 if (SDValue SD = visitVPGATHER(N))
26728 return SD;
26729
26730 if (N->getOpcode() == ISD::VP_SCATTER)
26731 if (SDValue SD = visitVPSCATTER(N))
26732 return SD;
26733
26734 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26735 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26736 return SD;
26737
26738 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26739 if (SDValue SD = visitVP_STRIDED_STORE(N))
26740 return SD;
26741
26742 // VP operations in which all vector elements are disabled - either by
26743 // determining that the mask is all false or that the EVL is 0 - can be
26744 // eliminated.
26745 bool AreAllEltsDisabled = false;
26746 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26747 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26748 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26749 AreAllEltsDisabled |=
26750 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26751
26752 // This is the only generic VP combine we support for now.
26753 if (!AreAllEltsDisabled) {
26754 switch (N->getOpcode()) {
26755 case ISD::VP_FADD:
26756 return visitVP_FADD(N);
26757 case ISD::VP_FSUB:
26758 return visitVP_FSUB(N);
26759 case ISD::VP_FMA:
26760 return visitFMA<VPMatchContext>(N);
26761 case ISD::VP_SELECT:
26762 return visitVP_SELECT(N);
26763 case ISD::VP_MUL:
26764 return visitMUL<VPMatchContext>(N);
26765 default:
26766 break;
26767 }
26768 return SDValue();
26769 }
26770
26771 // Binary operations can be replaced by UNDEF.
26772 if (ISD::isVPBinaryOp(N->getOpcode()))
26773 return DAG.getUNDEF(N->getValueType(0));
26774
26775 // VP Memory operations can be replaced by either the chain (stores) or the
26776 // chain + undef (loads).
26777 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26778 if (MemSD->writeMem())
26779 return MemSD->getChain();
26780 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26781 }
26782
26783 // Reduction operations return the start operand when no elements are active.
26784 if (ISD::isVPReduction(N->getOpcode()))
26785 return N->getOperand(0);
26786
26787 return SDValue();
26788}
26789
26790SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26791 SDValue Chain = N->getOperand(0);
26792 SDValue Ptr = N->getOperand(1);
26793 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26794
26795 // Check if the memory, where FP state is written to, is used only in a single
26796 // load operation.
26797 LoadSDNode *LdNode = nullptr;
26798 for (auto *U : Ptr->uses()) {
26799 if (U == N)
26800 continue;
26801 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26802 if (LdNode && LdNode != Ld)
26803 return SDValue();
26804 LdNode = Ld;
26805 continue;
26806 }
26807 return SDValue();
26808 }
26809 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26810 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26812 return SDValue();
26813
26814 // Check if the loaded value is used only in a store operation.
26815 StoreSDNode *StNode = nullptr;
26816 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26817 SDUse &U = I.getUse();
26818 if (U.getResNo() == 0) {
26819 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26820 if (StNode)
26821 return SDValue();
26822 StNode = St;
26823 } else {
26824 return SDValue();
26825 }
26826 }
26827 }
26828 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26829 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26830 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26831 return SDValue();
26832
26833 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26834 // environment.
26835 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26836 StNode->getMemOperand());
26837 CombineTo(StNode, Res, false);
26838 return Res;
26839}
26840
26841SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26842 SDValue Chain = N->getOperand(0);
26843 SDValue Ptr = N->getOperand(1);
26844 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26845
26846 // Check if the address of FP state is used also in a store operation only.
26847 StoreSDNode *StNode = nullptr;
26848 for (auto *U : Ptr->uses()) {
26849 if (U == N)
26850 continue;
26851 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26852 if (StNode && StNode != St)
26853 return SDValue();
26854 StNode = St;
26855 continue;
26856 }
26857 return SDValue();
26858 }
26859 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26860 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26861 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26862 return SDValue();
26863
26864 // Check if the stored value is loaded from some location and the loaded
26865 // value is used only in the store operation.
26866 SDValue StValue = StNode->getValue();
26867 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26868 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26869 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26870 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26871 return SDValue();
26872
26873 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26874 // environment.
26875 SDValue Res =
26876 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26877 LdNode->getMemOperand());
26878 return Res;
26879}
26880
26881/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26882/// with the destination vector and a zero vector.
26883/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26884/// vector_shuffle V, Zero, <0, 4, 2, 4>
26885SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26886 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26887
26888 EVT VT = N->getValueType(0);
26889 SDValue LHS = N->getOperand(0);
26890 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26891 SDLoc DL(N);
26892
26893 // Make sure we're not running after operation legalization where it
26894 // may have custom lowered the vector shuffles.
26895 if (LegalOperations)
26896 return SDValue();
26897
26898 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26899 return SDValue();
26900
26901 EVT RVT = RHS.getValueType();
26902 unsigned NumElts = RHS.getNumOperands();
26903
26904 // Attempt to create a valid clear mask, splitting the mask into
26905 // sub elements and checking to see if each is
26906 // all zeros or all ones - suitable for shuffle masking.
26907 auto BuildClearMask = [&](int Split) {
26908 int NumSubElts = NumElts * Split;
26909 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26910
26911 SmallVector<int, 8> Indices;
26912 for (int i = 0; i != NumSubElts; ++i) {
26913 int EltIdx = i / Split;
26914 int SubIdx = i % Split;
26915 SDValue Elt = RHS.getOperand(EltIdx);
26916 // X & undef --> 0 (not undef). So this lane must be converted to choose
26917 // from the zero constant vector (same as if the element had all 0-bits).
26918 if (Elt.isUndef()) {
26919 Indices.push_back(i + NumSubElts);
26920 continue;
26921 }
26922
26923 APInt Bits;
26924 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26925 Bits = Cst->getAPIntValue();
26926 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26927 Bits = CstFP->getValueAPF().bitcastToAPInt();
26928 else
26929 return SDValue();
26930
26931 // Extract the sub element from the constant bit mask.
26932 if (DAG.getDataLayout().isBigEndian())
26933 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26934 else
26935 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26936
26937 if (Bits.isAllOnes())
26938 Indices.push_back(i);
26939 else if (Bits == 0)
26940 Indices.push_back(i + NumSubElts);
26941 else
26942 return SDValue();
26943 }
26944
26945 // Let's see if the target supports this vector_shuffle.
26946 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26947 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26948 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26949 return SDValue();
26950
26951 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26952 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26953 DAG.getBitcast(ClearVT, LHS),
26954 Zero, Indices));
26955 };
26956
26957 // Determine maximum split level (byte level masking).
26958 int MaxSplit = 1;
26959 if (RVT.getScalarSizeInBits() % 8 == 0)
26960 MaxSplit = RVT.getScalarSizeInBits() / 8;
26961
26962 for (int Split = 1; Split <= MaxSplit; ++Split)
26963 if (RVT.getScalarSizeInBits() % Split == 0)
26964 if (SDValue S = BuildClearMask(Split))
26965 return S;
26966
26967 return SDValue();
26968}
26969
26970/// If a vector binop is performed on splat values, it may be profitable to
26971/// extract, scalarize, and insert/splat.
26973 const SDLoc &DL) {
26974 SDValue N0 = N->getOperand(0);
26975 SDValue N1 = N->getOperand(1);
26976 unsigned Opcode = N->getOpcode();
26977 EVT VT = N->getValueType(0);
26978 EVT EltVT = VT.getVectorElementType();
26979 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26980
26981 // TODO: Remove/replace the extract cost check? If the elements are available
26982 // as scalars, then there may be no extract cost. Should we ask if
26983 // inserting a scalar back into a vector is cheap instead?
26984 int Index0, Index1;
26985 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26986 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26987 // Extract element from splat_vector should be free.
26988 // TODO: use DAG.isSplatValue instead?
26989 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26991 if (!Src0 || !Src1 || Index0 != Index1 ||
26992 Src0.getValueType().getVectorElementType() != EltVT ||
26993 Src1.getValueType().getVectorElementType() != EltVT ||
26994 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26995 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26996 return SDValue();
26997
26998 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26999 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
27000 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
27001 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27002
27003 // If all lanes but 1 are undefined, no need to splat the scalar result.
27004 // TODO: Keep track of undefs and use that info in the general case.
27005 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27006 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
27007 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
27008 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27009 // build_vec ..undef, (bo X, Y), undef...
27011 Ops[Index0] = ScalarBO;
27012 return DAG.getBuildVector(VT, DL, Ops);
27013 }
27014
27015 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27016 return DAG.getSplat(VT, DL, ScalarBO);
27017}
27018
27019/// Visit a vector cast operation, like FP_EXTEND.
27020SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27021 EVT VT = N->getValueType(0);
27022 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27023 EVT EltVT = VT.getVectorElementType();
27024 unsigned Opcode = N->getOpcode();
27025
27026 SDValue N0 = N->getOperand(0);
27027 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27028
27029 // TODO: promote operation might be also good here?
27030 int Index0;
27031 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27032 if (Src0 &&
27033 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27034 TLI.isExtractVecEltCheap(VT, Index0)) &&
27035 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27036 TLI.preferScalarizeSplat(N)) {
27037 EVT SrcVT = N0.getValueType();
27038 EVT SrcEltVT = SrcVT.getVectorElementType();
27039 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27040 SDValue Elt =
27041 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27042 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27043 if (VT.isScalableVector())
27044 return DAG.getSplatVector(VT, DL, ScalarBO);
27046 return DAG.getBuildVector(VT, DL, Ops);
27047 }
27048
27049 return SDValue();
27050}
27051
27052/// Visit a binary vector operation, like ADD.
27053SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27054 EVT VT = N->getValueType(0);
27055 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27056
27057 SDValue LHS = N->getOperand(0);
27058 SDValue RHS = N->getOperand(1);
27059 unsigned Opcode = N->getOpcode();
27060 SDNodeFlags Flags = N->getFlags();
27061
27062 // Move unary shuffles with identical masks after a vector binop:
27063 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27064 // --> shuffle (VBinOp A, B), Undef, Mask
27065 // This does not require type legality checks because we are creating the
27066 // same types of operations that are in the original sequence. We do have to
27067 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27068 // though. This code is adapted from the identical transform in instcombine.
27069 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27070 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27071 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27072 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27073 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27074 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27075 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27076 RHS.getOperand(0), Flags);
27077 SDValue UndefV = LHS.getOperand(1);
27078 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27079 }
27080
27081 // Try to sink a splat shuffle after a binop with a uniform constant.
27082 // This is limited to cases where neither the shuffle nor the constant have
27083 // undefined elements because that could be poison-unsafe or inhibit
27084 // demanded elements analysis. It is further limited to not change a splat
27085 // of an inserted scalar because that may be optimized better by
27086 // load-folding or other target-specific behaviors.
27087 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27088 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27089 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27090 // binop (splat X), (splat C) --> splat (binop X, C)
27091 SDValue X = Shuf0->getOperand(0);
27092 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27093 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27094 Shuf0->getMask());
27095 }
27096 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27097 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27098 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27099 // binop (splat C), (splat X) --> splat (binop C, X)
27100 SDValue X = Shuf1->getOperand(0);
27101 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27102 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27103 Shuf1->getMask());
27104 }
27105 }
27106
27107 // The following pattern is likely to emerge with vector reduction ops. Moving
27108 // the binary operation ahead of insertion may allow using a narrower vector
27109 // instruction that has better performance than the wide version of the op:
27110 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27111 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27112 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27113 LHS.getOperand(2) == RHS.getOperand(2) &&
27114 (LHS.hasOneUse() || RHS.hasOneUse())) {
27115 SDValue X = LHS.getOperand(1);
27116 SDValue Y = RHS.getOperand(1);
27117 SDValue Z = LHS.getOperand(2);
27118 EVT NarrowVT = X.getValueType();
27119 if (NarrowVT == Y.getValueType() &&
27120 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27121 LegalOperations)) {
27122 // (binop undef, undef) may not return undef, so compute that result.
27123 SDValue VecC =
27124 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27125 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27126 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27127 }
27128 }
27129
27130 // Make sure all but the first op are undef or constant.
27131 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27132 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27133 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27134 return Op.isUndef() ||
27135 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27136 });
27137 };
27138
27139 // The following pattern is likely to emerge with vector reduction ops. Moving
27140 // the binary operation ahead of the concat may allow using a narrower vector
27141 // instruction that has better performance than the wide version of the op:
27142 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27143 // concat (VBinOp X, Y), VecC
27144 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27145 (LHS.hasOneUse() || RHS.hasOneUse())) {
27146 EVT NarrowVT = LHS.getOperand(0).getValueType();
27147 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27148 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27149 unsigned NumOperands = LHS.getNumOperands();
27150 SmallVector<SDValue, 4> ConcatOps;
27151 for (unsigned i = 0; i != NumOperands; ++i) {
27152 // This constant fold for operands 1 and up.
27153 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27154 RHS.getOperand(i)));
27155 }
27156
27157 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27158 }
27159 }
27160
27161 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
27162 return V;
27163
27164 return SDValue();
27165}
27166
27167SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27168 SDValue N2) {
27169 assert(N0.getOpcode() == ISD::SETCC &&
27170 "First argument must be a SetCC node!");
27171
27172 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27173 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27174
27175 // If we got a simplified select_cc node back from SimplifySelectCC, then
27176 // break it down into a new SETCC node, and a new SELECT node, and then return
27177 // the SELECT node, since we were called with a SELECT node.
27178 if (SCC.getNode()) {
27179 // Check to see if we got a select_cc back (to turn into setcc/select).
27180 // Otherwise, just return whatever node we got back, like fabs.
27181 if (SCC.getOpcode() == ISD::SELECT_CC) {
27182 const SDNodeFlags Flags = N0->getFlags();
27184 N0.getValueType(),
27185 SCC.getOperand(0), SCC.getOperand(1),
27186 SCC.getOperand(4), Flags);
27187 AddToWorklist(SETCC.getNode());
27188 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27189 SCC.getOperand(2), SCC.getOperand(3));
27190 SelectNode->setFlags(Flags);
27191 return SelectNode;
27192 }
27193
27194 return SCC;
27195 }
27196 return SDValue();
27197}
27198
27199/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27200/// being selected between, see if we can simplify the select. Callers of this
27201/// should assume that TheSelect is deleted if this returns true. As such, they
27202/// should return the appropriate thing (e.g. the node) back to the top-level of
27203/// the DAG combiner loop to avoid it being looked at.
27204bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27205 SDValue RHS) {
27206 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27207 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27208 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27209 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27210 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27211 SDValue Sqrt = RHS;
27213 SDValue CmpLHS;
27214 const ConstantFPSDNode *Zero = nullptr;
27215
27216 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27217 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27218 CmpLHS = TheSelect->getOperand(0);
27219 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27220 } else {
27221 // SELECT or VSELECT
27222 SDValue Cmp = TheSelect->getOperand(0);
27223 if (Cmp.getOpcode() == ISD::SETCC) {
27224 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27225 CmpLHS = Cmp.getOperand(0);
27226 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27227 }
27228 }
27229 if (Zero && Zero->isZero() &&
27230 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27231 CC == ISD::SETULT || CC == ISD::SETLT)) {
27232 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27233 CombineTo(TheSelect, Sqrt);
27234 return true;
27235 }
27236 }
27237 }
27238 // Cannot simplify select with vector condition
27239 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27240
27241 // If this is a select from two identical things, try to pull the operation
27242 // through the select.
27243 if (LHS.getOpcode() != RHS.getOpcode() ||
27244 !LHS.hasOneUse() || !RHS.hasOneUse())
27245 return false;
27246
27247 // If this is a load and the token chain is identical, replace the select
27248 // of two loads with a load through a select of the address to load from.
27249 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27250 // constants have been dropped into the constant pool.
27251 if (LHS.getOpcode() == ISD::LOAD) {
27252 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27253 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27254
27255 // Token chains must be identical.
27256 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27257 // Do not let this transformation reduce the number of volatile loads.
27258 // Be conservative for atomics for the moment
27259 // TODO: This does appear to be legal for unordered atomics (see D66309)
27260 !LLD->isSimple() || !RLD->isSimple() ||
27261 // FIXME: If either is a pre/post inc/dec load,
27262 // we'd need to split out the address adjustment.
27263 LLD->isIndexed() || RLD->isIndexed() ||
27264 // If this is an EXTLOAD, the VT's must match.
27265 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27266 // If this is an EXTLOAD, the kind of extension must match.
27267 (LLD->getExtensionType() != RLD->getExtensionType() &&
27268 // The only exception is if one of the extensions is anyext.
27269 LLD->getExtensionType() != ISD::EXTLOAD &&
27270 RLD->getExtensionType() != ISD::EXTLOAD) ||
27271 // FIXME: this discards src value information. This is
27272 // over-conservative. It would be beneficial to be able to remember
27273 // both potential memory locations. Since we are discarding
27274 // src value info, don't do the transformation if the memory
27275 // locations are not in the default address space.
27276 LLD->getPointerInfo().getAddrSpace() != 0 ||
27277 RLD->getPointerInfo().getAddrSpace() != 0 ||
27278 // We can't produce a CMOV of a TargetFrameIndex since we won't
27279 // generate the address generation required.
27282 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27283 LLD->getBasePtr().getValueType()))
27284 return false;
27285
27286 // The loads must not depend on one another.
27287 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27288 return false;
27289
27290 // Check that the select condition doesn't reach either load. If so,
27291 // folding this will induce a cycle into the DAG. If not, this is safe to
27292 // xform, so create a select of the addresses.
27293
27296
27297 // Always fail if LLD and RLD are not independent. TheSelect is a
27298 // predecessor to all Nodes in question so we need not search past it.
27299
27300 Visited.insert(TheSelect);
27301 Worklist.push_back(LLD);
27302 Worklist.push_back(RLD);
27303
27304 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27305 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27306 return false;
27307
27308 SDValue Addr;
27309 if (TheSelect->getOpcode() == ISD::SELECT) {
27310 // We cannot do this optimization if any pair of {RLD, LLD} is a
27311 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27312 // Loads, we only need to check if CondNode is a successor to one of the
27313 // loads. We can further avoid this if there's no use of their chain
27314 // value.
27315 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27316 Worklist.push_back(CondNode);
27317
27318 if ((LLD->hasAnyUseOfValue(1) &&
27319 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27320 (RLD->hasAnyUseOfValue(1) &&
27321 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27322 return false;
27323
27324 Addr = DAG.getSelect(SDLoc(TheSelect),
27325 LLD->getBasePtr().getValueType(),
27326 TheSelect->getOperand(0), LLD->getBasePtr(),
27327 RLD->getBasePtr());
27328 } else { // Otherwise SELECT_CC
27329 // We cannot do this optimization if any pair of {RLD, LLD} is a
27330 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27331 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27332 // one of the loads. We can further avoid this if there's no use of their
27333 // chain value.
27334
27335 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27336 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27337 Worklist.push_back(CondLHS);
27338 Worklist.push_back(CondRHS);
27339
27340 if ((LLD->hasAnyUseOfValue(1) &&
27341 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27342 (RLD->hasAnyUseOfValue(1) &&
27343 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27344 return false;
27345
27346 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27347 LLD->getBasePtr().getValueType(),
27348 TheSelect->getOperand(0),
27349 TheSelect->getOperand(1),
27350 LLD->getBasePtr(), RLD->getBasePtr(),
27351 TheSelect->getOperand(4));
27352 }
27353
27354 SDValue Load;
27355 // It is safe to replace the two loads if they have different alignments,
27356 // but the new load must be the minimum (most restrictive) alignment of the
27357 // inputs.
27358 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27359 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27360 if (!RLD->isInvariant())
27361 MMOFlags &= ~MachineMemOperand::MOInvariant;
27362 if (!RLD->isDereferenceable())
27363 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27364 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27365 // FIXME: Discards pointer and AA info.
27366 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27367 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27368 MMOFlags);
27369 } else {
27370 // FIXME: Discards pointer and AA info.
27371 Load = DAG.getExtLoad(
27373 : LLD->getExtensionType(),
27374 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27375 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27376 }
27377
27378 // Users of the select now use the result of the load.
27379 CombineTo(TheSelect, Load);
27380
27381 // Users of the old loads now use the new load's chain. We know the
27382 // old-load value is dead now.
27383 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27384 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27385 return true;
27386 }
27387
27388 return false;
27389}
27390
27391/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27392/// bitwise 'and'.
27393SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27394 SDValue N1, SDValue N2, SDValue N3,
27395 ISD::CondCode CC) {
27396 // If this is a select where the false operand is zero and the compare is a
27397 // check of the sign bit, see if we can perform the "gzip trick":
27398 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27399 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27400 EVT XType = N0.getValueType();
27401 EVT AType = N2.getValueType();
27402 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27403 return SDValue();
27404
27405 // If the comparison is testing for a positive value, we have to invert
27406 // the sign bit mask, so only do that transform if the target has a bitwise
27407 // 'and not' instruction (the invert is free).
27408 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27409 // (X > -1) ? A : 0
27410 // (X > 0) ? X : 0 <-- This is canonical signed max.
27411 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27412 return SDValue();
27413 } else if (CC == ISD::SETLT) {
27414 // (X < 0) ? A : 0
27415 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27416 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27417 return SDValue();
27418 } else {
27419 return SDValue();
27420 }
27421
27422 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27423 // constant.
27424 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27425 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27426 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27427 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27428 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27429 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27430 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27431 AddToWorklist(Shift.getNode());
27432
27433 if (XType.bitsGT(AType)) {
27434 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27435 AddToWorklist(Shift.getNode());
27436 }
27437
27438 if (CC == ISD::SETGT)
27439 Shift = DAG.getNOT(DL, Shift, AType);
27440
27441 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27442 }
27443 }
27444
27445 unsigned ShCt = XType.getSizeInBits() - 1;
27446 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27447 return SDValue();
27448
27449 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27450 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27451 AddToWorklist(Shift.getNode());
27452
27453 if (XType.bitsGT(AType)) {
27454 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27455 AddToWorklist(Shift.getNode());
27456 }
27457
27458 if (CC == ISD::SETGT)
27459 Shift = DAG.getNOT(DL, Shift, AType);
27460
27461 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27462}
27463
27464// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27465SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27466 SDValue N0 = N->getOperand(0);
27467 SDValue N1 = N->getOperand(1);
27468 SDValue N2 = N->getOperand(2);
27469 SDLoc DL(N);
27470
27471 unsigned BinOpc = N1.getOpcode();
27472 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27473 (N1.getResNo() != N2.getResNo()))
27474 return SDValue();
27475
27476 // The use checks are intentionally on SDNode because we may be dealing
27477 // with opcodes that produce more than one SDValue.
27478 // TODO: Do we really need to check N0 (the condition operand of the select)?
27479 // But removing that clause could cause an infinite loop...
27480 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27481 return SDValue();
27482
27483 // Binops may include opcodes that return multiple values, so all values
27484 // must be created/propagated from the newly created binops below.
27485 SDVTList OpVTs = N1->getVTList();
27486
27487 // Fold select(cond, binop(x, y), binop(z, y))
27488 // --> binop(select(cond, x, z), y)
27489 if (N1.getOperand(1) == N2.getOperand(1)) {
27490 SDValue N10 = N1.getOperand(0);
27491 SDValue N20 = N2.getOperand(0);
27492 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27493 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27494 NewBinOp->setFlags(N1->getFlags());
27495 NewBinOp->intersectFlagsWith(N2->getFlags());
27496 return SDValue(NewBinOp.getNode(), N1.getResNo());
27497 }
27498
27499 // Fold select(cond, binop(x, y), binop(x, z))
27500 // --> binop(x, select(cond, y, z))
27501 if (N1.getOperand(0) == N2.getOperand(0)) {
27502 SDValue N11 = N1.getOperand(1);
27503 SDValue N21 = N2.getOperand(1);
27504 // Second op VT might be different (e.g. shift amount type)
27505 if (N11.getValueType() == N21.getValueType()) {
27506 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27507 SDValue NewBinOp =
27508 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27509 NewBinOp->setFlags(N1->getFlags());
27510 NewBinOp->intersectFlagsWith(N2->getFlags());
27511 return SDValue(NewBinOp.getNode(), N1.getResNo());
27512 }
27513 }
27514
27515 // TODO: Handle isCommutativeBinOp patterns as well?
27516 return SDValue();
27517}
27518
27519// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27520SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27521 SDValue N0 = N->getOperand(0);
27522 EVT VT = N->getValueType(0);
27523 bool IsFabs = N->getOpcode() == ISD::FABS;
27524 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27525
27526 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27527 return SDValue();
27528
27529 SDValue Int = N0.getOperand(0);
27530 EVT IntVT = Int.getValueType();
27531
27532 // The operand to cast should be integer.
27533 if (!IntVT.isInteger() || IntVT.isVector())
27534 return SDValue();
27535
27536 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27537 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27538 APInt SignMask;
27539 if (N0.getValueType().isVector()) {
27540 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27541 // 0x7f...) per element and splat it.
27543 if (IsFabs)
27544 SignMask = ~SignMask;
27545 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27546 } else {
27547 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27548 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27549 if (IsFabs)
27550 SignMask = ~SignMask;
27551 }
27552 SDLoc DL(N0);
27553 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27554 DAG.getConstant(SignMask, DL, IntVT));
27555 AddToWorklist(Int.getNode());
27556 return DAG.getBitcast(VT, Int);
27557}
27558
27559/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27560/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27561/// in it. This may be a win when the constant is not otherwise available
27562/// because it replaces two constant pool loads with one.
27563SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27564 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27565 ISD::CondCode CC) {
27567 return SDValue();
27568
27569 // If we are before legalize types, we want the other legalization to happen
27570 // first (for example, to avoid messing with soft float).
27571 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27572 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27573 EVT VT = N2.getValueType();
27574 if (!TV || !FV || !TLI.isTypeLegal(VT))
27575 return SDValue();
27576
27577 // If a constant can be materialized without loads, this does not make sense.
27579 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27580 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27581 return SDValue();
27582
27583 // If both constants have multiple uses, then we won't need to do an extra
27584 // load. The values are likely around in registers for other users.
27585 if (!TV->hasOneUse() && !FV->hasOneUse())
27586 return SDValue();
27587
27588 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27589 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27590 Type *FPTy = Elts[0]->getType();
27591 const DataLayout &TD = DAG.getDataLayout();
27592
27593 // Create a ConstantArray of the two constants.
27594 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27595 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27596 TD.getPrefTypeAlign(FPTy));
27597 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27598
27599 // Get offsets to the 0 and 1 elements of the array, so we can select between
27600 // them.
27601 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27602 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27603 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27604 SDValue Cond =
27605 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27606 AddToWorklist(Cond.getNode());
27607 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27608 AddToWorklist(CstOffset.getNode());
27609 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27610 AddToWorklist(CPIdx.getNode());
27611 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27613 DAG.getMachineFunction()), Alignment);
27614}
27615
27616/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27617/// where 'cond' is the comparison specified by CC.
27618SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27620 bool NotExtCompare) {
27621 // (x ? y : y) -> y.
27622 if (N2 == N3) return N2;
27623
27624 EVT CmpOpVT = N0.getValueType();
27625 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27626 EVT VT = N2.getValueType();
27627 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27628 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27629 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27630
27631 // Determine if the condition we're dealing with is constant.
27632 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27633 AddToWorklist(SCC.getNode());
27634 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27635 // fold select_cc true, x, y -> x
27636 // fold select_cc false, x, y -> y
27637 return !(SCCC->isZero()) ? N2 : N3;
27638 }
27639 }
27640
27641 if (SDValue V =
27642 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27643 return V;
27644
27645 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27646 return V;
27647
27648 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27649 // where y is has a single bit set.
27650 // A plaintext description would be, we can turn the SELECT_CC into an AND
27651 // when the condition can be materialized as an all-ones register. Any
27652 // single bit-test can be materialized as an all-ones register with
27653 // shift-left and shift-right-arith.
27654 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27655 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27656 SDValue AndLHS = N0->getOperand(0);
27657 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27658 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27659 // Shift the tested bit over the sign bit.
27660 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27661 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27662 unsigned ShCt = AndMask.getBitWidth() - 1;
27663 SDValue ShlAmt =
27664 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27665 getShiftAmountTy(AndLHS.getValueType()));
27666 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27667
27668 // Now arithmetic right shift it all the way over, so the result is
27669 // either all-ones, or zero.
27670 SDValue ShrAmt =
27671 DAG.getConstant(ShCt, SDLoc(Shl),
27673 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27674
27675 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27676 }
27677 }
27678 }
27679
27680 // fold select C, 16, 0 -> shl C, 4
27681 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27682 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27683
27684 if ((Fold || Swap) &&
27685 TLI.getBooleanContents(CmpOpVT) ==
27687 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27688
27689 if (Swap) {
27690 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27691 std::swap(N2C, N3C);
27692 }
27693
27694 // If the caller doesn't want us to simplify this into a zext of a compare,
27695 // don't do it.
27696 if (NotExtCompare && N2C->isOne())
27697 return SDValue();
27698
27699 SDValue Temp, SCC;
27700 // zext (setcc n0, n1)
27701 if (LegalTypes) {
27702 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27703 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27704 } else {
27705 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27706 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27707 }
27708
27709 AddToWorklist(SCC.getNode());
27710 AddToWorklist(Temp.getNode());
27711
27712 if (N2C->isOne())
27713 return Temp;
27714
27715 unsigned ShCt = N2C->getAPIntValue().logBase2();
27716 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27717 return SDValue();
27718
27719 // shl setcc result by log2 n2c
27720 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27721 DAG.getConstant(ShCt, SDLoc(Temp),
27723 }
27724
27725 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27726 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27727 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27728 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27729 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27730 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27731 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27732 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27733 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27734 SDValue ValueOnZero = N2;
27735 SDValue Count = N3;
27736 // If the condition is NE instead of E, swap the operands.
27737 if (CC == ISD::SETNE)
27738 std::swap(ValueOnZero, Count);
27739 // Check if the value on zero is a constant equal to the bits in the type.
27740 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27741 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27742 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27743 // legal, combine to just cttz.
27744 if ((Count.getOpcode() == ISD::CTTZ ||
27745 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27746 N0 == Count.getOperand(0) &&
27747 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27748 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27749 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27750 // legal, combine to just ctlz.
27751 if ((Count.getOpcode() == ISD::CTLZ ||
27752 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27753 N0 == Count.getOperand(0) &&
27754 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27755 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27756 }
27757 }
27758 }
27759
27760 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27761 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27762 if (!NotExtCompare && N1C && N2C && N3C &&
27763 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27764 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27765 (N1C->isZero() && CC == ISD::SETLT)) &&
27766 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27767 SDValue ASR = DAG.getNode(
27768 ISD::SRA, DL, CmpOpVT, N0,
27769 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27770 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27771 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27772 }
27773
27774 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27775 return S;
27776 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27777 return S;
27778
27779 return SDValue();
27780}
27781
27782/// This is a stub for TargetLowering::SimplifySetCC.
27783SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27784 ISD::CondCode Cond, const SDLoc &DL,
27785 bool foldBooleans) {
27787 DagCombineInfo(DAG, Level, false, this);
27788 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27789}
27790
27791/// Given an ISD::SDIV node expressing a divide by constant, return
27792/// a DAG expression to select that will generate the same value by multiplying
27793/// by a magic number.
27794/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27795SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27796 // when optimising for minimum size, we don't want to expand a div to a mul
27797 // and a shift.
27799 return SDValue();
27800
27802 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27803 for (SDNode *N : Built)
27804 AddToWorklist(N);
27805 return S;
27806 }
27807
27808 return SDValue();
27809}
27810
27811/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27812/// DAG expression that will generate the same value by right shifting.
27813SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27814 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27815 if (!C)
27816 return SDValue();
27817
27818 // Avoid division by zero.
27819 if (C->isZero())
27820 return SDValue();
27821
27823 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27824 for (SDNode *N : Built)
27825 AddToWorklist(N);
27826 return S;
27827 }
27828
27829 return SDValue();
27830}
27831
27832/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27833/// expression that will generate the same value by multiplying by a magic
27834/// number.
27835/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27836SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27837 // when optimising for minimum size, we don't want to expand a div to a mul
27838 // and a shift.
27840 return SDValue();
27841
27843 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27844 for (SDNode *N : Built)
27845 AddToWorklist(N);
27846 return S;
27847 }
27848
27849 return SDValue();
27850}
27851
27852/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27853/// return a DAG expression that will generate the same value.
27854SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27855 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27856 if (!C)
27857 return SDValue();
27858
27859 // Avoid division by zero.
27860 if (C->isZero())
27861 return SDValue();
27862
27864 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27865 for (SDNode *N : Built)
27866 AddToWorklist(N);
27867 return S;
27868 }
27869
27870 return SDValue();
27871}
27872
27873// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27874//
27875// Returns the node that represents `Log2(Op)`. This may create a new node. If
27876// we are unable to compute `Log2(Op)` its return `SDValue()`.
27877//
27878// All nodes will be created at `DL` and the output will be of type `VT`.
27879//
27880// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27881// `AssumeNonZero` if this function should simply assume (not require proving
27882// `Op` is non-zero).
27884 SDValue Op, unsigned Depth,
27885 bool AssumeNonZero) {
27886 assert(VT.isInteger() && "Only integer types are supported!");
27887
27888 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27889 while (true) {
27890 switch (V.getOpcode()) {
27891 case ISD::TRUNCATE:
27892 case ISD::ZERO_EXTEND:
27893 V = V.getOperand(0);
27894 break;
27895 default:
27896 return V;
27897 }
27898 }
27899 };
27900
27901 if (VT.isScalableVector())
27902 return SDValue();
27903
27904 Op = PeekThroughCastsAndTrunc(Op);
27905
27906 // Helper for determining whether a value is a power-2 constant scalar or a
27907 // vector of such elements.
27908 SmallVector<APInt> Pow2Constants;
27909 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27910 if (C->isZero() || C->isOpaque())
27911 return false;
27912 // TODO: We may also be able to support negative powers of 2 here.
27913 if (C->getAPIntValue().isPowerOf2()) {
27914 Pow2Constants.emplace_back(C->getAPIntValue());
27915 return true;
27916 }
27917 return false;
27918 };
27919
27920 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27921 if (!VT.isVector())
27922 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27923 // We need to create a build vector
27924 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
27925 return DAG.getSplat(VT, DL,
27926 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
27927 VT.getScalarType()));
27928 SmallVector<SDValue> Log2Ops;
27929 for (const APInt &Pow2 : Pow2Constants)
27930 Log2Ops.emplace_back(
27931 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27932 return DAG.getBuildVector(VT, DL, Log2Ops);
27933 }
27934
27935 if (Depth >= DAG.MaxRecursionDepth)
27936 return SDValue();
27937
27938 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27939 ToCast = PeekThroughCastsAndTrunc(ToCast);
27940 EVT CurVT = ToCast.getValueType();
27941 if (NewVT == CurVT)
27942 return ToCast;
27943
27944 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27945 return DAG.getBitcast(NewVT, ToCast);
27946
27947 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27948 };
27949
27950 // log2(X << Y) -> log2(X) + Y
27951 if (Op.getOpcode() == ISD::SHL) {
27952 // 1 << Y and X nuw/nsw << Y are all non-zero.
27953 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27954 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27955 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27956 Depth + 1, AssumeNonZero))
27957 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27958 CastToVT(VT, Op.getOperand(1)));
27959 }
27960
27961 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27962 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27963 Op.hasOneUse()) {
27964 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27965 Depth + 1, AssumeNonZero))
27966 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27967 Depth + 1, AssumeNonZero))
27968 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27969 }
27970
27971 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27972 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27973 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27974 Op.hasOneUse()) {
27975 // Use AssumeNonZero as false here. Otherwise we can hit case where
27976 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27977 if (SDValue LogX =
27978 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27979 /*AssumeNonZero*/ false))
27980 if (SDValue LogY =
27981 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27982 /*AssumeNonZero*/ false))
27983 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27984 }
27985
27986 return SDValue();
27987}
27988
27989/// Determines the LogBase2 value for a non-null input value using the
27990/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27991SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27992 bool KnownNonZero, bool InexpensiveOnly,
27993 std::optional<EVT> OutVT) {
27994 EVT VT = OutVT ? *OutVT : V.getValueType();
27995 SDValue InexpensiveLogBase2 =
27996 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27997 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27998 return InexpensiveLogBase2;
27999
28000 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
28001 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28002 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
28003 return LogBase2;
28004}
28005
28006/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28007/// For the reciprocal, we need to find the zero of the function:
28008/// F(X) = 1/X - A [which has a zero at X = 1/A]
28009/// =>
28010/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28011/// does not require additional intermediate precision]
28012/// For the last iteration, put numerator N into it to gain more precision:
28013/// Result = N X_i + X_i (N - N A X_i)
28014SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28015 SDNodeFlags Flags) {
28016 if (LegalDAG)
28017 return SDValue();
28018
28019 // TODO: Handle extended types?
28020 EVT VT = Op.getValueType();
28021 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28022 VT.getScalarType() != MVT::f64)
28023 return SDValue();
28024
28025 // If estimates are explicitly disabled for this function, we're done.
28027 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28028 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28029 return SDValue();
28030
28031 // Estimates may be explicitly enabled for this type with a custom number of
28032 // refinement steps.
28033 int Iterations = TLI.getDivRefinementSteps(VT, MF);
28034 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28035 AddToWorklist(Est.getNode());
28036
28037 SDLoc DL(Op);
28038 if (Iterations) {
28039 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28040
28041 // Newton iterations: Est = Est + Est (N - Arg * Est)
28042 // If this is the last iteration, also multiply by the numerator.
28043 for (int i = 0; i < Iterations; ++i) {
28044 SDValue MulEst = Est;
28045
28046 if (i == Iterations - 1) {
28047 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28048 AddToWorklist(MulEst.getNode());
28049 }
28050
28051 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28052 AddToWorklist(NewEst.getNode());
28053
28054 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28055 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28056 AddToWorklist(NewEst.getNode());
28057
28058 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28059 AddToWorklist(NewEst.getNode());
28060
28061 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28062 AddToWorklist(Est.getNode());
28063 }
28064 } else {
28065 // If no iterations are available, multiply with N.
28066 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28067 AddToWorklist(Est.getNode());
28068 }
28069
28070 return Est;
28071 }
28072
28073 return SDValue();
28074}
28075
28076/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28077/// For the reciprocal sqrt, we need to find the zero of the function:
28078/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28079/// =>
28080/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28081/// As a result, we precompute A/2 prior to the iteration loop.
28082SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28083 unsigned Iterations,
28084 SDNodeFlags Flags, bool Reciprocal) {
28085 EVT VT = Arg.getValueType();
28086 SDLoc DL(Arg);
28087 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28088
28089 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28090 // this entire sequence requires only one FP constant.
28091 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28092 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28093
28094 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28095 for (unsigned i = 0; i < Iterations; ++i) {
28096 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28097 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28098 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28099 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28100 }
28101
28102 // If non-reciprocal square root is requested, multiply the result by Arg.
28103 if (!Reciprocal)
28104 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28105
28106 return Est;
28107}
28108
28109/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28110/// For the reciprocal sqrt, we need to find the zero of the function:
28111/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28112/// =>
28113/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28114SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28115 unsigned Iterations,
28116 SDNodeFlags Flags, bool Reciprocal) {
28117 EVT VT = Arg.getValueType();
28118 SDLoc DL(Arg);
28119 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28120 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28121
28122 // This routine must enter the loop below to work correctly
28123 // when (Reciprocal == false).
28124 assert(Iterations > 0);
28125
28126 // Newton iterations for reciprocal square root:
28127 // E = (E * -0.5) * ((A * E) * E + -3.0)
28128 for (unsigned i = 0; i < Iterations; ++i) {
28129 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28130 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28131 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28132
28133 // When calculating a square root at the last iteration build:
28134 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28135 // (notice a common subexpression)
28136 SDValue LHS;
28137 if (Reciprocal || (i + 1) < Iterations) {
28138 // RSQRT: LHS = (E * -0.5)
28139 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28140 } else {
28141 // SQRT: LHS = (A * E) * -0.5
28142 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28143 }
28144
28145 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28146 }
28147
28148 return Est;
28149}
28150
28151/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28152/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28153/// Op can be zero.
28154SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28155 bool Reciprocal) {
28156 if (LegalDAG)
28157 return SDValue();
28158
28159 // TODO: Handle extended types?
28160 EVT VT = Op.getValueType();
28161 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28162 VT.getScalarType() != MVT::f64)
28163 return SDValue();
28164
28165 // If estimates are explicitly disabled for this function, we're done.
28167 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28168 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28169 return SDValue();
28170
28171 // Estimates may be explicitly enabled for this type with a custom number of
28172 // refinement steps.
28173 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28174
28175 bool UseOneConstNR = false;
28176 if (SDValue Est =
28177 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28178 Reciprocal)) {
28179 AddToWorklist(Est.getNode());
28180
28181 if (Iterations > 0)
28182 Est = UseOneConstNR
28183 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28184 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28185 if (!Reciprocal) {
28186 SDLoc DL(Op);
28187 // Try the target specific test first.
28188 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28189
28190 // The estimate is now completely wrong if the input was exactly 0.0 or
28191 // possibly a denormal. Force the answer to 0.0 or value provided by
28192 // target for those cases.
28193 Est = DAG.getNode(
28194 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28195 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28196 }
28197 return Est;
28198 }
28199
28200 return SDValue();
28201}
28202
28203SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28204 return buildSqrtEstimateImpl(Op, Flags, true);
28205}
28206
28207SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28208 return buildSqrtEstimateImpl(Op, Flags, false);
28209}
28210
28211/// Return true if there is any possibility that the two addresses overlap.
28212bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28213
28214 struct MemUseCharacteristics {
28215 bool IsVolatile;
28216 bool IsAtomic;
28218 int64_t Offset;
28219 LocationSize NumBytes;
28220 MachineMemOperand *MMO;
28221 };
28222
28223 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28224 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28225 int64_t Offset = 0;
28226 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28227 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28228 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28229 ? -1 * C->getSExtValue()
28230 : 0;
28231 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28232 return {LSN->isVolatile(), LSN->isAtomic(),
28233 LSN->getBasePtr(), Offset /*base offset*/,
28234 LocationSize::precise(Size), LSN->getMemOperand()};
28235 }
28236 if (const auto *LN = cast<LifetimeSDNode>(N))
28237 return {false /*isVolatile*/,
28238 /*isAtomic*/ false,
28239 LN->getOperand(1),
28240 (LN->hasOffset()) ? LN->getOffset() : 0,
28241 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28243 (MachineMemOperand *)nullptr};
28244 // Default.
28245 return {false /*isvolatile*/,
28246 /*isAtomic*/ false,
28247 SDValue(),
28248 (int64_t)0 /*offset*/,
28250 (MachineMemOperand *)nullptr};
28251 };
28252
28253 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28254 MUC1 = getCharacteristics(Op1);
28255
28256 // If they are to the same address, then they must be aliases.
28257 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28258 MUC0.Offset == MUC1.Offset)
28259 return true;
28260
28261 // If they are both volatile then they cannot be reordered.
28262 if (MUC0.IsVolatile && MUC1.IsVolatile)
28263 return true;
28264
28265 // Be conservative about atomics for the moment
28266 // TODO: This is way overconservative for unordered atomics (see D66309)
28267 if (MUC0.IsAtomic && MUC1.IsAtomic)
28268 return true;
28269
28270 if (MUC0.MMO && MUC1.MMO) {
28271 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28272 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28273 return false;
28274 }
28275
28276 // If NumBytes is scalable and offset is not 0, conservatively return may
28277 // alias
28278 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28279 MUC0.Offset != 0) ||
28280 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28281 MUC1.Offset != 0))
28282 return true;
28283 // Try to prove that there is aliasing, or that there is no aliasing. Either
28284 // way, we can return now. If nothing can be proved, proceed with more tests.
28285 bool IsAlias;
28286 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28287 DAG, IsAlias))
28288 return IsAlias;
28289
28290 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28291 // either are not known.
28292 if (!MUC0.MMO || !MUC1.MMO)
28293 return true;
28294
28295 // If one operation reads from invariant memory, and the other may store, they
28296 // cannot alias. These should really be checking the equivalent of mayWrite,
28297 // but it only matters for memory nodes other than load /store.
28298 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28299 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28300 return false;
28301
28302 // If we know required SrcValue1 and SrcValue2 have relatively large
28303 // alignment compared to the size and offset of the access, we may be able
28304 // to prove they do not alias. This check is conservative for now to catch
28305 // cases created by splitting vector types, it only works when the offsets are
28306 // multiples of the size of the data.
28307 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28308 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28309 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28310 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28311 LocationSize Size0 = MUC0.NumBytes;
28312 LocationSize Size1 = MUC1.NumBytes;
28313
28314 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28315 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28316 !Size1.isScalable() && Size0 == Size1 &&
28317 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28318 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28319 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28320 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28321 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28322
28323 // There is no overlap between these relatively aligned accesses of
28324 // similar size. Return no alias.
28325 if ((OffAlign0 + static_cast<int64_t>(
28326 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28327 (OffAlign1 + static_cast<int64_t>(
28328 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28329 return false;
28330 }
28331
28332 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28334 : DAG.getSubtarget().useAA();
28335#ifndef NDEBUG
28336 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28338 UseAA = false;
28339#endif
28340
28341 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28342 Size0.hasValue() && Size1.hasValue() &&
28343 // Can't represent a scalable size + fixed offset in LocationSize
28344 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28345 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28346 // Use alias analysis information.
28347 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28348 int64_t Overlap0 =
28349 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28350 int64_t Overlap1 =
28351 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28352 LocationSize Loc0 =
28353 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28354 LocationSize Loc1 =
28355 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28356 if (AA->isNoAlias(
28357 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28358 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28359 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28360 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28361 return false;
28362 }
28363
28364 // Otherwise we have to assume they alias.
28365 return true;
28366}
28367
28368/// Walk up chain skipping non-aliasing memory nodes,
28369/// looking for aliasing nodes and adding them to the Aliases vector.
28370void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28371 SmallVectorImpl<SDValue> &Aliases) {
28372 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28373 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28374
28375 // Get alias information for node.
28376 // TODO: relax aliasing for unordered atomics (see D66309)
28377 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28378
28379 // Starting off.
28380 Chains.push_back(OriginalChain);
28381 unsigned Depth = 0;
28382
28383 // Attempt to improve chain by a single step
28384 auto ImproveChain = [&](SDValue &C) -> bool {
28385 switch (C.getOpcode()) {
28386 case ISD::EntryToken:
28387 // No need to mark EntryToken.
28388 C = SDValue();
28389 return true;
28390 case ISD::LOAD:
28391 case ISD::STORE: {
28392 // Get alias information for C.
28393 // TODO: Relax aliasing for unordered atomics (see D66309)
28394 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28395 cast<LSBaseSDNode>(C.getNode())->isSimple();
28396 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28397 // Look further up the chain.
28398 C = C.getOperand(0);
28399 return true;
28400 }
28401 // Alias, so stop here.
28402 return false;
28403 }
28404
28405 case ISD::CopyFromReg:
28406 // Always forward past CopyFromReg.
28407 C = C.getOperand(0);
28408 return true;
28409
28411 case ISD::LIFETIME_END: {
28412 // We can forward past any lifetime start/end that can be proven not to
28413 // alias the memory access.
28414 if (!mayAlias(N, C.getNode())) {
28415 // Look further up the chain.
28416 C = C.getOperand(0);
28417 return true;
28418 }
28419 return false;
28420 }
28421 default:
28422 return false;
28423 }
28424 };
28425
28426 // Look at each chain and determine if it is an alias. If so, add it to the
28427 // aliases list. If not, then continue up the chain looking for the next
28428 // candidate.
28429 while (!Chains.empty()) {
28430 SDValue Chain = Chains.pop_back_val();
28431
28432 // Don't bother if we've seen Chain before.
28433 if (!Visited.insert(Chain.getNode()).second)
28434 continue;
28435
28436 // For TokenFactor nodes, look at each operand and only continue up the
28437 // chain until we reach the depth limit.
28438 //
28439 // FIXME: The depth check could be made to return the last non-aliasing
28440 // chain we found before we hit a tokenfactor rather than the original
28441 // chain.
28442 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28443 Aliases.clear();
28444 Aliases.push_back(OriginalChain);
28445 return;
28446 }
28447
28448 if (Chain.getOpcode() == ISD::TokenFactor) {
28449 // We have to check each of the operands of the token factor for "small"
28450 // token factors, so we queue them up. Adding the operands to the queue
28451 // (stack) in reverse order maintains the original order and increases the
28452 // likelihood that getNode will find a matching token factor (CSE.)
28453 if (Chain.getNumOperands() > 16) {
28454 Aliases.push_back(Chain);
28455 continue;
28456 }
28457 for (unsigned n = Chain.getNumOperands(); n;)
28458 Chains.push_back(Chain.getOperand(--n));
28459 ++Depth;
28460 continue;
28461 }
28462 // Everything else
28463 if (ImproveChain(Chain)) {
28464 // Updated Chain Found, Consider new chain if one exists.
28465 if (Chain.getNode())
28466 Chains.push_back(Chain);
28467 ++Depth;
28468 continue;
28469 }
28470 // No Improved Chain Possible, treat as Alias.
28471 Aliases.push_back(Chain);
28472 }
28473}
28474
28475/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28476/// (aliasing node.)
28477SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28478 if (OptLevel == CodeGenOptLevel::None)
28479 return OldChain;
28480
28481 // Ops for replacing token factor.
28483
28484 // Accumulate all the aliases to this node.
28485 GatherAllAliases(N, OldChain, Aliases);
28486
28487 // If no operands then chain to entry token.
28488 if (Aliases.empty())
28489 return DAG.getEntryNode();
28490
28491 // If a single operand then chain to it. We don't need to revisit it.
28492 if (Aliases.size() == 1)
28493 return Aliases[0];
28494
28495 // Construct a custom tailored token factor.
28496 return DAG.getTokenFactor(SDLoc(N), Aliases);
28497}
28498
28499// This function tries to collect a bunch of potentially interesting
28500// nodes to improve the chains of, all at once. This might seem
28501// redundant, as this function gets called when visiting every store
28502// node, so why not let the work be done on each store as it's visited?
28503//
28504// I believe this is mainly important because mergeConsecutiveStores
28505// is unable to deal with merging stores of different sizes, so unless
28506// we improve the chains of all the potential candidates up-front
28507// before running mergeConsecutiveStores, it might only see some of
28508// the nodes that will eventually be candidates, and then not be able
28509// to go from a partially-merged state to the desired final
28510// fully-merged state.
28511
28512bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28513 SmallVector<StoreSDNode *, 8> ChainedStores;
28514 StoreSDNode *STChain = St;
28515 // Intervals records which offsets from BaseIndex have been covered. In
28516 // the common case, every store writes to the immediately previous address
28517 // space and thus merged with the previous interval at insertion time.
28518
28519 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28521 IMap::Allocator A;
28522 IMap Intervals(A);
28523
28524 // This holds the base pointer, index, and the offset in bytes from the base
28525 // pointer.
28527
28528 // We must have a base and an offset.
28529 if (!BasePtr.getBase().getNode())
28530 return false;
28531
28532 // Do not handle stores to undef base pointers.
28533 if (BasePtr.getBase().isUndef())
28534 return false;
28535
28536 // Do not handle stores to opaque types
28537 if (St->getMemoryVT().isZeroSized())
28538 return false;
28539
28540 // BaseIndexOffset assumes that offsets are fixed-size, which
28541 // is not valid for scalable vectors where the offsets are
28542 // scaled by `vscale`, so bail out early.
28543 if (St->getMemoryVT().isScalableVT())
28544 return false;
28545
28546 // Add ST's interval.
28547 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28548 std::monostate{});
28549
28550 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28551 if (Chain->getMemoryVT().isScalableVector())
28552 return false;
28553
28554 // If the chain has more than one use, then we can't reorder the mem ops.
28555 if (!SDValue(Chain, 0)->hasOneUse())
28556 break;
28557 // TODO: Relax for unordered atomics (see D66309)
28558 if (!Chain->isSimple() || Chain->isIndexed())
28559 break;
28560
28561 // Find the base pointer and offset for this memory node.
28562 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28563 // Check that the base pointer is the same as the original one.
28564 int64_t Offset;
28565 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28566 break;
28567 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28568 // Make sure we don't overlap with other intervals by checking the ones to
28569 // the left or right before inserting.
28570 auto I = Intervals.find(Offset);
28571 // If there's a next interval, we should end before it.
28572 if (I != Intervals.end() && I.start() < (Offset + Length))
28573 break;
28574 // If there's a previous interval, we should start after it.
28575 if (I != Intervals.begin() && (--I).stop() <= Offset)
28576 break;
28577 Intervals.insert(Offset, Offset + Length, std::monostate{});
28578
28579 ChainedStores.push_back(Chain);
28580 STChain = Chain;
28581 }
28582
28583 // If we didn't find a chained store, exit.
28584 if (ChainedStores.empty())
28585 return false;
28586
28587 // Improve all chained stores (St and ChainedStores members) starting from
28588 // where the store chain ended and return single TokenFactor.
28589 SDValue NewChain = STChain->getChain();
28591 for (unsigned I = ChainedStores.size(); I;) {
28592 StoreSDNode *S = ChainedStores[--I];
28593 SDValue BetterChain = FindBetterChain(S, NewChain);
28594 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28595 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28596 TFOps.push_back(SDValue(S, 0));
28597 ChainedStores[I] = S;
28598 }
28599
28600 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28601 SDValue BetterChain = FindBetterChain(St, NewChain);
28602 SDValue NewST;
28603 if (St->isTruncatingStore())
28604 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28605 St->getBasePtr(), St->getMemoryVT(),
28606 St->getMemOperand());
28607 else
28608 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28609 St->getBasePtr(), St->getMemOperand());
28610
28611 TFOps.push_back(NewST);
28612
28613 // If we improved every element of TFOps, then we've lost the dependence on
28614 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28615 // the beginning to keep relative order consistent with FindBetterChains.
28616 auto hasImprovedChain = [&](SDValue ST) -> bool {
28617 return ST->getOperand(0) != NewChain;
28618 };
28619 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28620 if (AddNewChain)
28621 TFOps.insert(TFOps.begin(), NewChain);
28622
28623 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28624 CombineTo(St, TF);
28625
28626 // Add TF and its operands to the worklist.
28627 AddToWorklist(TF.getNode());
28628 for (const SDValue &Op : TF->ops())
28629 AddToWorklist(Op.getNode());
28630 AddToWorklist(STChain);
28631 return true;
28632}
28633
28634bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28635 if (OptLevel == CodeGenOptLevel::None)
28636 return false;
28637
28639
28640 // We must have a base and an offset.
28641 if (!BasePtr.getBase().getNode())
28642 return false;
28643
28644 // Do not handle stores to undef base pointers.
28645 if (BasePtr.getBase().isUndef())
28646 return false;
28647
28648 // Directly improve a chain of disjoint stores starting at St.
28649 if (parallelizeChainedStores(St))
28650 return true;
28651
28652 // Improve St's Chain..
28653 SDValue BetterChain = FindBetterChain(St, St->getChain());
28654 if (St->getChain() != BetterChain) {
28655 replaceStoreChain(St, BetterChain);
28656 return true;
28657 }
28658 return false;
28659}
28660
28661/// This is the entry point for the file.
28663 CodeGenOptLevel OptLevel) {
28664 /// This is the main entry point to this class.
28665 DAGCombiner(*this, AA, OptLevel).Run(Level);
28666}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:531
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1113
bool isNegative() const
Definition: APFloat.h:1348
bool isNormal() const
Definition: APFloat.h:1352
bool isDenormal() const
Definition: APFloat.h:1349
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1331
const fltSemantics & getSemantics() const
Definition: APFloat.h:1356
bool isNaN() const
Definition: APFloat.h:1346
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:991
APInt bitcastToAPInt() const
Definition: APFloat.h:1254
bool isLargest() const
Definition: APFloat.h:1364
bool isIEEE() const
Definition: APFloat.h:1366
bool isInfinity() const
Definition: APFloat.h:1345
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:213
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:428
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:208
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1628
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1471
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
APInt abs() const
Get the absolute value.
Definition: APInt.h:1752
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:350
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1161
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:237
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:445
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1447
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1090
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:188
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:308
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1228
int32_t exactLogBase2() const
Definition: APInt.h:1740
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1597
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1556
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1490
unsigned countLeadingZeros() const
Definition: APInt.h:1564
unsigned logBase2() const
Definition: APInt.h:1718
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:489
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:454
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:450
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:467
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1129
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1236
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:419
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:285
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:275
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:179
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:368
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:265
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:218
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1521
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:837
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:830
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1614
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:345
iterator end()
Definition: DenseMap.h:84
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:350
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:364
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:968
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:565
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:488
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:673
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:557
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:493
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:574
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:892
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:922
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:418
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2193
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2198
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2203
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2208
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:811
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:764
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:737
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:484
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1391
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:567
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:728
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1262
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:495
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:491
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:804
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:551
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1376
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1380
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:702
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:834
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1390
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:485
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:927
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1431
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:670
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:628
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1373
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:736
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1377
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:772
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:944
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1098
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:654
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:741
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1258
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1392
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:635
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1385
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:659
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:608
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:581
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:999
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:543
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ TargetConstantFP
Definition: ISDOpcodes.h:165
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:870
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:756
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1350
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:986
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ TargetFrameIndex
Definition: ISDOpcodes.h:172
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:823
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:682
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1325
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:902
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:750
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1212
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1393
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:936
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1005
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:850
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1028
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:267
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:665
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1374
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:532
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:959
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:883
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:845
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:869
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1381
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:800
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1091
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:501
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1161
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1033
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:320
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:523
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1520
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1630
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1605
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1625
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1446
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1516
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1516
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1587
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1503
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1534
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1599
@ VecLoad
Definition: NVPTX.h:88
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:893
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4590
constexpr double e
Definition: MathExtras.h:47
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:353
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1540
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2058
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:511
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1522
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1490
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:325
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:246
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:321
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:317
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:262
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:331
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:269
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:258
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:79
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:310
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...