LLVM 20.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG. For fast lookup and
174 /// deduplication, the index of the node in this vector is stored in the
175 /// node in SDNode::CombinerWorklistIndex.
177
178 /// This records all nodes attempted to be added to the worklist since we
179 /// considered a new worklist entry. As we keep do not add duplicate nodes
180 /// in the worklist, this is different from the tail of the worklist.
182
183 /// Map from candidate StoreNode to the pair of RootNode and count.
184 /// The count is used to track how many times we have seen the StoreNode
185 /// with the same RootNode bail out in dependence check. If we have seen
186 /// the bail out for the same pair many times over a limit, we won't
187 /// consider the StoreNode with the same RootNode as store merging
188 /// candidate again.
190
191 // AA - Used for DAG load/store alias analysis.
192 AliasAnalysis *AA;
193
194 /// When an instruction is simplified, add all users of the instruction to
195 /// the work lists because they might get more simplified now.
196 void AddUsersToWorklist(SDNode *N) {
197 for (SDNode *Node : N->uses())
198 AddToWorklist(Node);
199 }
200
201 /// Convenient shorthand to add a node and all of its user to the worklist.
202 void AddToWorklistWithUsers(SDNode *N) {
203 AddUsersToWorklist(N);
204 AddToWorklist(N);
205 }
206
207 // Prune potentially dangling nodes. This is called after
208 // any visit to a node, but should also be called during a visit after any
209 // failed combine which may have created a DAG node.
210 void clearAddedDanglingWorklistEntries() {
211 // Check any nodes added to the worklist to see if they are prunable.
212 while (!PruningList.empty()) {
213 auto *N = PruningList.pop_back_val();
214 if (N->use_empty())
215 recursivelyDeleteUnusedNodes(N);
216 }
217 }
218
219 SDNode *getNextWorklistEntry() {
220 // Before we do any work, remove nodes that are not in use.
221 clearAddedDanglingWorklistEntries();
222 SDNode *N = nullptr;
223 // The Worklist holds the SDNodes in order, but it may contain null
224 // entries.
225 while (!N && !Worklist.empty()) {
226 N = Worklist.pop_back_val();
227 }
228
229 if (N) {
230 assert(N->getCombinerWorklistIndex() >= 0 &&
231 "Found a worklist entry without a corresponding map entry!");
232 // Set to -2 to indicate that we combined the node.
233 N->setCombinerWorklistIndex(-2);
234 }
235 return N;
236 }
237
238 /// Call the node-specific routine that folds each particular type of node.
239 SDValue visit(SDNode *N);
240
241 public:
242 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
243 : DAG(D), TLI(D.getTargetLoweringInfo()),
244 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
245 ForCodeSize = DAG.shouldOptForSize();
246 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
247
248 MaximumLegalStoreInBits = 0;
249 // We use the minimum store size here, since that's all we can guarantee
250 // for the scalable vector types.
251 for (MVT VT : MVT::all_valuetypes())
252 if (EVT(VT).isSimple() && VT != MVT::Other &&
253 TLI.isTypeLegal(EVT(VT)) &&
254 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
255 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
256 }
257
258 void ConsiderForPruning(SDNode *N) {
259 // Mark this for potential pruning.
260 PruningList.insert(N);
261 }
262
263 /// Add to the worklist making sure its instance is at the back (next to be
264 /// processed.)
265 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true,
266 bool SkipIfCombinedBefore = false) {
267 assert(N->getOpcode() != ISD::DELETED_NODE &&
268 "Deleted Node added to Worklist");
269
270 // Skip handle nodes as they can't usefully be combined and confuse the
271 // zero-use deletion strategy.
272 if (N->getOpcode() == ISD::HANDLENODE)
273 return;
274
275 if (SkipIfCombinedBefore && N->getCombinerWorklistIndex() == -2)
276 return;
277
278 if (IsCandidateForPruning)
279 ConsiderForPruning(N);
280
281 if (N->getCombinerWorklistIndex() < 0) {
282 N->setCombinerWorklistIndex(Worklist.size());
283 Worklist.push_back(N);
284 }
285 }
286
287 /// Remove all instances of N from the worklist.
288 void removeFromWorklist(SDNode *N) {
289 PruningList.remove(N);
290 StoreRootCountMap.erase(N);
291
292 int WorklistIndex = N->getCombinerWorklistIndex();
293 // If not in the worklist, the index might be -1 or -2 (was combined
294 // before). As the node gets deleted anyway, there's no need to update
295 // the index.
296 if (WorklistIndex < 0)
297 return; // Not in the worklist.
298
299 // Null out the entry rather than erasing it to avoid a linear operation.
300 Worklist[WorklistIndex] = nullptr;
301 N->setCombinerWorklistIndex(-1);
302 }
303
304 void deleteAndRecombine(SDNode *N);
305 bool recursivelyDeleteUnusedNodes(SDNode *N);
306
307 /// Replaces all uses of the results of one DAG node with new values.
308 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
309 bool AddTo = true);
310
311 /// Replaces all uses of the results of one DAG node with new values.
312 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
313 return CombineTo(N, &Res, 1, AddTo);
314 }
315
316 /// Replaces all uses of the results of one DAG node with new values.
317 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
318 bool AddTo = true) {
319 SDValue To[] = { Res0, Res1 };
320 return CombineTo(N, To, 2, AddTo);
321 }
322
323 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
324
325 private:
326 unsigned MaximumLegalStoreInBits;
327
328 /// Check the specified integer node value to see if it can be simplified or
329 /// if things it uses can be simplified by bit propagation.
330 /// If so, return true.
331 bool SimplifyDemandedBits(SDValue Op) {
332 unsigned BitWidth = Op.getScalarValueSizeInBits();
334 return SimplifyDemandedBits(Op, DemandedBits);
335 }
336
337 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
338 EVT VT = Op.getValueType();
339 APInt DemandedElts = VT.isFixedLengthVector()
341 : APInt(1, 1);
342 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
343 }
344
345 /// Check the specified vector node value to see if it can be simplified or
346 /// if things it uses can be simplified as it only uses some of the
347 /// elements. If so, return true.
348 bool SimplifyDemandedVectorElts(SDValue Op) {
349 // TODO: For now just pretend it cannot be simplified.
350 if (Op.getValueType().isScalableVector())
351 return false;
352
353 unsigned NumElts = Op.getValueType().getVectorNumElements();
354 APInt DemandedElts = APInt::getAllOnes(NumElts);
355 return SimplifyDemandedVectorElts(Op, DemandedElts);
356 }
357
358 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
359 const APInt &DemandedElts,
360 bool AssumeSingleUse = false);
361 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
362 bool AssumeSingleUse = false);
363
364 bool CombineToPreIndexedLoadStore(SDNode *N);
365 bool CombineToPostIndexedLoadStore(SDNode *N);
366 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
367 bool SliceUpLoad(SDNode *N);
368
369 // Looks up the chain to find a unique (unaliased) store feeding the passed
370 // load. If no such store is found, returns a nullptr.
371 // Note: This will look past a CALLSEQ_START if the load is chained to it so
372 // so that it can find stack stores for byval params.
373 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
374 // Scalars have size 0 to distinguish from singleton vectors.
375 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
376 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
377 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
378
379 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
380 /// load.
381 ///
382 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
383 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
384 /// \param EltNo index of the vector element to load.
385 /// \param OriginalLoad load that EVE came from to be replaced.
386 /// \returns EVE on success SDValue() on failure.
387 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
388 SDValue EltNo,
389 LoadSDNode *OriginalLoad);
390 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
391 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
392 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
393 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
394 SDValue PromoteIntBinOp(SDValue Op);
395 SDValue PromoteIntShiftOp(SDValue Op);
396 SDValue PromoteExtend(SDValue Op);
397 bool PromoteLoad(SDValue Op);
398
399 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
400 SDValue RHS, SDValue True, SDValue False,
402
403 /// Call the node-specific routine that knows how to fold each
404 /// particular type of node. If that doesn't do anything, try the
405 /// target-specific DAG combines.
406 SDValue combine(SDNode *N);
407
408 // Visitation implementation - Implement dag node combining for different
409 // node types. The semantics are as follows:
410 // Return Value:
411 // SDValue.getNode() == 0 - No change was made
412 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
413 // otherwise - N should be replaced by the returned Operand.
414 //
415 SDValue visitTokenFactor(SDNode *N);
416 SDValue visitMERGE_VALUES(SDNode *N);
417 SDValue visitADD(SDNode *N);
418 SDValue visitADDLike(SDNode *N);
419 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
420 SDValue visitSUB(SDNode *N);
421 SDValue visitADDSAT(SDNode *N);
422 SDValue visitSUBSAT(SDNode *N);
423 SDValue visitADDC(SDNode *N);
424 SDValue visitADDO(SDNode *N);
425 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
426 SDValue visitSUBC(SDNode *N);
427 SDValue visitSUBO(SDNode *N);
428 SDValue visitADDE(SDNode *N);
429 SDValue visitUADDO_CARRY(SDNode *N);
430 SDValue visitSADDO_CARRY(SDNode *N);
431 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
432 SDNode *N);
433 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
434 SDNode *N);
435 SDValue visitSUBE(SDNode *N);
436 SDValue visitUSUBO_CARRY(SDNode *N);
437 SDValue visitSSUBO_CARRY(SDNode *N);
438 template <class MatchContextClass> SDValue visitMUL(SDNode *N);
439 SDValue visitMULFIX(SDNode *N);
440 SDValue useDivRem(SDNode *N);
441 SDValue visitSDIV(SDNode *N);
442 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
443 SDValue visitUDIV(SDNode *N);
444 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
445 SDValue visitREM(SDNode *N);
446 SDValue visitMULHU(SDNode *N);
447 SDValue visitMULHS(SDNode *N);
448 SDValue visitAVG(SDNode *N);
449 SDValue visitABD(SDNode *N);
450 SDValue visitSMUL_LOHI(SDNode *N);
451 SDValue visitUMUL_LOHI(SDNode *N);
452 SDValue visitMULO(SDNode *N);
453 SDValue visitIMINMAX(SDNode *N);
454 SDValue visitAND(SDNode *N);
455 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
456 SDValue visitOR(SDNode *N);
457 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
458 SDValue visitXOR(SDNode *N);
459 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
460 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
461 SDValue visitSHL(SDNode *N);
462 SDValue visitSRA(SDNode *N);
463 SDValue visitSRL(SDNode *N);
464 SDValue visitFunnelShift(SDNode *N);
465 SDValue visitSHLSAT(SDNode *N);
466 SDValue visitRotate(SDNode *N);
467 SDValue visitABS(SDNode *N);
468 SDValue visitBSWAP(SDNode *N);
469 SDValue visitBITREVERSE(SDNode *N);
470 SDValue visitCTLZ(SDNode *N);
471 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
472 SDValue visitCTTZ(SDNode *N);
473 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
474 SDValue visitCTPOP(SDNode *N);
475 SDValue visitSELECT(SDNode *N);
476 SDValue visitVSELECT(SDNode *N);
477 SDValue visitVP_SELECT(SDNode *N);
478 SDValue visitSELECT_CC(SDNode *N);
479 SDValue visitSETCC(SDNode *N);
480 SDValue visitSETCCCARRY(SDNode *N);
481 SDValue visitSIGN_EXTEND(SDNode *N);
482 SDValue visitZERO_EXTEND(SDNode *N);
483 SDValue visitANY_EXTEND(SDNode *N);
484 SDValue visitAssertExt(SDNode *N);
485 SDValue visitAssertAlign(SDNode *N);
486 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
487 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
488 SDValue visitTRUNCATE(SDNode *N);
489 SDValue visitBITCAST(SDNode *N);
490 SDValue visitFREEZE(SDNode *N);
491 SDValue visitBUILD_PAIR(SDNode *N);
492 SDValue visitFADD(SDNode *N);
493 SDValue visitVP_FADD(SDNode *N);
494 SDValue visitVP_FSUB(SDNode *N);
495 SDValue visitSTRICT_FADD(SDNode *N);
496 SDValue visitFSUB(SDNode *N);
497 SDValue visitFMUL(SDNode *N);
498 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
499 SDValue visitFMAD(SDNode *N);
500 SDValue visitFDIV(SDNode *N);
501 SDValue visitFREM(SDNode *N);
502 SDValue visitFSQRT(SDNode *N);
503 SDValue visitFCOPYSIGN(SDNode *N);
504 SDValue visitFPOW(SDNode *N);
505 SDValue visitSINT_TO_FP(SDNode *N);
506 SDValue visitUINT_TO_FP(SDNode *N);
507 SDValue visitFP_TO_SINT(SDNode *N);
508 SDValue visitFP_TO_UINT(SDNode *N);
509 SDValue visitXRINT(SDNode *N);
510 SDValue visitFP_ROUND(SDNode *N);
511 SDValue visitFP_EXTEND(SDNode *N);
512 SDValue visitFNEG(SDNode *N);
513 SDValue visitFABS(SDNode *N);
514 SDValue visitFCEIL(SDNode *N);
515 SDValue visitFTRUNC(SDNode *N);
516 SDValue visitFFREXP(SDNode *N);
517 SDValue visitFFLOOR(SDNode *N);
518 SDValue visitFMinMax(SDNode *N);
519 SDValue visitBRCOND(SDNode *N);
520 SDValue visitBR_CC(SDNode *N);
521 SDValue visitLOAD(SDNode *N);
522
523 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
524 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
525 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
526
527 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
528
529 SDValue visitSTORE(SDNode *N);
530 SDValue visitATOMIC_STORE(SDNode *N);
531 SDValue visitLIFETIME_END(SDNode *N);
532 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
533 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
534 SDValue visitBUILD_VECTOR(SDNode *N);
535 SDValue visitCONCAT_VECTORS(SDNode *N);
536 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
537 SDValue visitVECTOR_SHUFFLE(SDNode *N);
538 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
539 SDValue visitINSERT_SUBVECTOR(SDNode *N);
540 SDValue visitVECTOR_COMPRESS(SDNode *N);
541 SDValue visitMLOAD(SDNode *N);
542 SDValue visitMSTORE(SDNode *N);
543 SDValue visitMGATHER(SDNode *N);
544 SDValue visitMSCATTER(SDNode *N);
545 SDValue visitVPGATHER(SDNode *N);
546 SDValue visitVPSCATTER(SDNode *N);
547 SDValue visitVP_STRIDED_LOAD(SDNode *N);
548 SDValue visitVP_STRIDED_STORE(SDNode *N);
549 SDValue visitFP_TO_FP16(SDNode *N);
550 SDValue visitFP16_TO_FP(SDNode *N);
551 SDValue visitFP_TO_BF16(SDNode *N);
552 SDValue visitBF16_TO_FP(SDNode *N);
553 SDValue visitVECREDUCE(SDNode *N);
554 SDValue visitVPOp(SDNode *N);
555 SDValue visitGET_FPENV_MEM(SDNode *N);
556 SDValue visitSET_FPENV_MEM(SDNode *N);
557
558 template <class MatchContextClass>
559 SDValue visitFADDForFMACombine(SDNode *N);
560 template <class MatchContextClass>
561 SDValue visitFSUBForFMACombine(SDNode *N);
562 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
563
564 SDValue XformToShuffleWithZero(SDNode *N);
565 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
566 const SDLoc &DL,
567 SDNode *N,
568 SDValue N0,
569 SDValue N1);
570 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
571 SDValue N1, SDNodeFlags Flags);
572 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
573 SDValue N1, SDNodeFlags Flags);
574 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
575 EVT VT, SDValue N0, SDValue N1,
576 SDNodeFlags Flags = SDNodeFlags());
577
578 SDValue visitShiftByConstant(SDNode *N);
579
580 SDValue foldSelectOfConstants(SDNode *N);
581 SDValue foldVSelectOfConstants(SDNode *N);
582 SDValue foldBinOpIntoSelect(SDNode *BO);
583 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
584 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
585 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
586 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
588 bool NotExtCompare = false);
589 SDValue convertSelectOfFPConstantsToLoadOffset(
590 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
592 SDValue foldSignChangeInBitcast(SDNode *N);
593 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
595 SDValue foldSelectOfBinops(SDNode *N);
596 SDValue foldSextSetcc(SDNode *N);
597 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
598 const SDLoc &DL);
599 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
600 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
601 SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
602 SDValue False, ISD::CondCode CC, const SDLoc &DL);
603 SDValue unfoldMaskedMerge(SDNode *N);
604 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
605 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
606 const SDLoc &DL, bool foldBooleans);
607 SDValue rebuildSetCC(SDValue N);
608
609 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
610 SDValue &CC, bool MatchStrict = false) const;
611 bool isOneUseSetCC(SDValue N) const;
612
613 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
614 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
615
616 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
617 unsigned HiOp);
618 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
619 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
620 const TargetLowering &TLI);
621
622 SDValue CombineExtLoad(SDNode *N);
623 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
624 SDValue combineRepeatedFPDivisors(SDNode *N);
625 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
626 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
628 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
629 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
630 SDValue BuildSDIV(SDNode *N);
631 SDValue BuildSDIVPow2(SDNode *N);
632 SDValue BuildUDIV(SDNode *N);
633 SDValue BuildSREMPow2(SDNode *N);
634 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
635 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
636 bool KnownNeverZero = false,
637 bool InexpensiveOnly = false,
638 std::optional<EVT> OutVT = std::nullopt);
639 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
640 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
642 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
643 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
644 SDNodeFlags Flags, bool Reciprocal);
645 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
646 SDNodeFlags Flags, bool Reciprocal);
647 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
648 bool DemandHighBits = true);
649 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
650 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
651 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652 unsigned PosOpcode, unsigned NegOpcode,
653 const SDLoc &DL);
654 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
655 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
656 unsigned PosOpcode, unsigned NegOpcode,
657 const SDLoc &DL);
658 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
659 SDValue MatchLoadCombine(SDNode *N);
660 SDValue mergeTruncStores(StoreSDNode *N);
661 SDValue reduceLoadWidth(SDNode *N);
662 SDValue ReduceLoadOpStoreWidth(SDNode *N);
664 SDValue TransformFPLoadStorePair(SDNode *N);
665 SDValue convertBuildVecZextToZext(SDNode *N);
666 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
667 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
668 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
669 SDValue reduceBuildVecToShuffle(SDNode *N);
670 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
671 ArrayRef<int> VectorMask, SDValue VecIn1,
672 SDValue VecIn2, unsigned LeftIdx,
673 bool DidSplitVec);
674 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
675
676 /// Walk up chain skipping non-aliasing memory nodes,
677 /// looking for aliasing nodes and adding them to the Aliases vector.
678 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
679 SmallVectorImpl<SDValue> &Aliases);
680
681 /// Return true if there is any possibility that the two addresses overlap.
682 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
683
684 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
685 /// chain (aliasing node.)
686 SDValue FindBetterChain(SDNode *N, SDValue Chain);
687
688 /// Try to replace a store and any possibly adjacent stores on
689 /// consecutive chains with better chains. Return true only if St is
690 /// replaced.
691 ///
692 /// Notice that other chains may still be replaced even if the function
693 /// returns false.
694 bool findBetterNeighborChains(StoreSDNode *St);
695
696 // Helper for findBetterNeighborChains. Walk up store chain add additional
697 // chained stores that do not overlap and can be parallelized.
698 bool parallelizeChainedStores(StoreSDNode *St);
699
700 /// Holds a pointer to an LSBaseSDNode as well as information on where it
701 /// is located in a sequence of memory operations connected by a chain.
702 struct MemOpLink {
703 // Ptr to the mem node.
704 LSBaseSDNode *MemNode;
705
706 // Offset from the base ptr.
707 int64_t OffsetFromBase;
708
709 MemOpLink(LSBaseSDNode *N, int64_t Offset)
710 : MemNode(N), OffsetFromBase(Offset) {}
711 };
712
713 // Classify the origin of a stored value.
714 enum class StoreSource { Unknown, Constant, Extract, Load };
715 StoreSource getStoreSource(SDValue StoreVal) {
716 switch (StoreVal.getOpcode()) {
717 case ISD::Constant:
718 case ISD::ConstantFP:
719 return StoreSource::Constant;
723 return StoreSource::Constant;
724 return StoreSource::Unknown;
727 return StoreSource::Extract;
728 case ISD::LOAD:
729 return StoreSource::Load;
730 default:
731 return StoreSource::Unknown;
732 }
733 }
734
735 /// This is a helper function for visitMUL to check the profitability
736 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
737 /// MulNode is the original multiply, AddNode is (add x, c1),
738 /// and ConstNode is c2.
739 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
740 SDValue ConstNode);
741
742 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
743 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
744 /// the type of the loaded value to be extended.
745 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
746 EVT LoadResultTy, EVT &ExtVT);
747
748 /// Helper function to calculate whether the given Load/Store can have its
749 /// width reduced to ExtVT.
750 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
751 EVT &MemVT, unsigned ShAmt = 0);
752
753 /// Used by BackwardsPropagateMask to find suitable loads.
754 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
755 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
756 ConstantSDNode *Mask, SDNode *&NodeToMask);
757 /// Attempt to propagate a given AND node back to load leaves so that they
758 /// can be combined into narrow loads.
759 bool BackwardsPropagateMask(SDNode *N);
760
761 /// Helper function for mergeConsecutiveStores which merges the component
762 /// store chains.
763 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
764 unsigned NumStores);
765
766 /// Helper function for mergeConsecutiveStores which checks if all the store
767 /// nodes have the same underlying object. We can still reuse the first
768 /// store's pointer info if all the stores are from the same object.
769 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
770
771 /// This is a helper function for mergeConsecutiveStores. When the source
772 /// elements of the consecutive stores are all constants or all extracted
773 /// vector elements, try to merge them into one larger store introducing
774 /// bitcasts if necessary. \return True if a merged store was created.
775 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
776 EVT MemVT, unsigned NumStores,
777 bool IsConstantSrc, bool UseVector,
778 bool UseTrunc);
779
780 /// This is a helper function for mergeConsecutiveStores. Stores that
781 /// potentially may be merged with St are placed in StoreNodes. RootNode is
782 /// a chain predecessor to all store candidates.
783 void getStoreMergeCandidates(StoreSDNode *St,
784 SmallVectorImpl<MemOpLink> &StoreNodes,
785 SDNode *&Root);
786
787 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
788 /// have indirect dependency through their operands. RootNode is the
789 /// predecessor to all stores calculated by getStoreMergeCandidates and is
790 /// used to prune the dependency check. \return True if safe to merge.
791 bool checkMergeStoreCandidatesForDependencies(
792 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
793 SDNode *RootNode);
794
795 /// This is a helper function for mergeConsecutiveStores. Given a list of
796 /// store candidates, find the first N that are consecutive in memory.
797 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
798 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
799 int64_t ElementSizeBytes) const;
800
801 /// This is a helper function for mergeConsecutiveStores. It is used for
802 /// store chains that are composed entirely of constant values.
803 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
804 unsigned NumConsecutiveStores,
805 EVT MemVT, SDNode *Root, bool AllowVectors);
806
807 /// This is a helper function for mergeConsecutiveStores. It is used for
808 /// store chains that are composed entirely of extracted vector elements.
809 /// When extracting multiple vector elements, try to store them in one
810 /// vector store rather than a sequence of scalar stores.
811 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
812 unsigned NumConsecutiveStores, EVT MemVT,
813 SDNode *Root);
814
815 /// This is a helper function for mergeConsecutiveStores. It is used for
816 /// store chains that are composed entirely of loaded values.
817 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
818 unsigned NumConsecutiveStores, EVT MemVT,
819 SDNode *Root, bool AllowVectors,
820 bool IsNonTemporalStore, bool IsNonTemporalLoad);
821
822 /// Merge consecutive store operations into a wide store.
823 /// This optimization uses wide integers or vectors when possible.
824 /// \return true if stores were merged.
825 bool mergeConsecutiveStores(StoreSDNode *St);
826
827 /// Try to transform a truncation where C is a constant:
828 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
829 ///
830 /// \p N needs to be a truncation and its first operand an AND. Other
831 /// requirements are checked by the function (e.g. that trunc is
832 /// single-use) and if missed an empty SDValue is returned.
833 SDValue distributeTruncateThroughAnd(SDNode *N);
834
835 /// Helper function to determine whether the target supports operation
836 /// given by \p Opcode for type \p VT, that is, whether the operation
837 /// is legal or custom before legalizing operations, and whether is
838 /// legal (but not custom) after legalization.
839 bool hasOperation(unsigned Opcode, EVT VT) {
840 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
841 }
842
843 public:
844 /// Runs the dag combiner on all nodes in the work list
845 void Run(CombineLevel AtLevel);
846
847 SelectionDAG &getDAG() const { return DAG; }
848
849 /// Convenience wrapper around TargetLowering::getShiftAmountTy.
850 EVT getShiftAmountTy(EVT LHSTy) {
851 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout());
852 }
853
854 /// This method returns true if we are running before type legalization or
855 /// if the specified VT is legal.
856 bool isTypeLegal(const EVT &VT) {
857 if (!LegalTypes) return true;
858 return TLI.isTypeLegal(VT);
859 }
860
861 /// Convenience wrapper around TargetLowering::getSetCCResultType
862 EVT getSetCCResultType(EVT VT) const {
863 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
864 }
865
866 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
867 SDValue OrigLoad, SDValue ExtLoad,
868 ISD::NodeType ExtType);
869 };
870
871/// This class is a DAGUpdateListener that removes any deleted
872/// nodes from the worklist.
873class WorklistRemover : public SelectionDAG::DAGUpdateListener {
874 DAGCombiner &DC;
875
876public:
877 explicit WorklistRemover(DAGCombiner &dc)
878 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
879
880 void NodeDeleted(SDNode *N, SDNode *E) override {
881 DC.removeFromWorklist(N);
882 }
883};
884
885class WorklistInserter : public SelectionDAG::DAGUpdateListener {
886 DAGCombiner &DC;
887
888public:
889 explicit WorklistInserter(DAGCombiner &dc)
890 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
891
892 // FIXME: Ideally we could add N to the worklist, but this causes exponential
893 // compile time costs in large DAGs, e.g. Halide.
894 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
895};
896
897} // end anonymous namespace
898
899//===----------------------------------------------------------------------===//
900// TargetLowering::DAGCombinerInfo implementation
901//===----------------------------------------------------------------------===//
902
904 ((DAGCombiner*)DC)->AddToWorklist(N);
905}
906
908CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
909 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
910}
911
913CombineTo(SDNode *N, SDValue Res, bool AddTo) {
914 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
915}
916
918CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
919 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
920}
921
924 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
925}
926
929 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
930}
931
932//===----------------------------------------------------------------------===//
933// Helper Functions
934//===----------------------------------------------------------------------===//
935
936void DAGCombiner::deleteAndRecombine(SDNode *N) {
937 removeFromWorklist(N);
938
939 // If the operands of this node are only used by the node, they will now be
940 // dead. Make sure to re-visit them and recursively delete dead nodes.
941 for (const SDValue &Op : N->ops())
942 // For an operand generating multiple values, one of the values may
943 // become dead allowing further simplification (e.g. split index
944 // arithmetic from an indexed load).
945 if (Op->hasOneUse() || Op->getNumValues() > 1)
946 AddToWorklist(Op.getNode());
947
948 DAG.DeleteNode(N);
949}
950
951// APInts must be the same size for most operations, this helper
952// function zero extends the shorter of the pair so that they match.
953// We provide an Offset so that we can create bitwidths that won't overflow.
954static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
955 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
956 LHS = LHS.zext(Bits);
957 RHS = RHS.zext(Bits);
958}
959
960// Return true if this node is a setcc, or is a select_cc
961// that selects between the target values used for true and false, making it
962// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
963// the appropriate nodes based on the type of node we are checking. This
964// simplifies life a bit for the callers.
965bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
966 SDValue &CC, bool MatchStrict) const {
967 if (N.getOpcode() == ISD::SETCC) {
968 LHS = N.getOperand(0);
969 RHS = N.getOperand(1);
970 CC = N.getOperand(2);
971 return true;
972 }
973
974 if (MatchStrict &&
975 (N.getOpcode() == ISD::STRICT_FSETCC ||
976 N.getOpcode() == ISD::STRICT_FSETCCS)) {
977 LHS = N.getOperand(1);
978 RHS = N.getOperand(2);
979 CC = N.getOperand(3);
980 return true;
981 }
982
983 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
984 !TLI.isConstFalseVal(N.getOperand(3)))
985 return false;
986
987 if (TLI.getBooleanContents(N.getValueType()) ==
989 return false;
990
991 LHS = N.getOperand(0);
992 RHS = N.getOperand(1);
993 CC = N.getOperand(4);
994 return true;
995}
996
997/// Return true if this is a SetCC-equivalent operation with only one use.
998/// If this is true, it allows the users to invert the operation for free when
999/// it is profitable to do so.
1000bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1001 SDValue N0, N1, N2;
1002 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1003 return true;
1004 return false;
1005}
1006
1008 if (!ScalarTy.isSimple())
1009 return false;
1010
1011 uint64_t MaskForTy = 0ULL;
1012 switch (ScalarTy.getSimpleVT().SimpleTy) {
1013 case MVT::i8:
1014 MaskForTy = 0xFFULL;
1015 break;
1016 case MVT::i16:
1017 MaskForTy = 0xFFFFULL;
1018 break;
1019 case MVT::i32:
1020 MaskForTy = 0xFFFFFFFFULL;
1021 break;
1022 default:
1023 return false;
1024 break;
1025 }
1026
1027 APInt Val;
1028 if (ISD::isConstantSplatVector(N, Val))
1029 return Val.getLimitedValue() == MaskForTy;
1030
1031 return false;
1032}
1033
1034// Determines if it is a constant integer or a splat/build vector of constant
1035// integers (and undefs).
1036// Do not permit build vector implicit truncation.
1037static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1038 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1039 return !(Const->isOpaque() && NoOpaques);
1040 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1041 return false;
1042 unsigned BitWidth = N.getScalarValueSizeInBits();
1043 for (const SDValue &Op : N->op_values()) {
1044 if (Op.isUndef())
1045 continue;
1046 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1047 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1048 (Const->isOpaque() && NoOpaques))
1049 return false;
1050 }
1051 return true;
1052}
1053
1054// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1055// undef's.
1056static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1057 if (V.getOpcode() != ISD::BUILD_VECTOR)
1058 return false;
1059 return isConstantOrConstantVector(V, NoOpaques) ||
1061}
1062
1063// Determine if this an indexed load with an opaque target constant index.
1064static bool canSplitIdx(LoadSDNode *LD) {
1065 return MaySplitLoadIndex &&
1066 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1067 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1068}
1069
1070bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1071 const SDLoc &DL,
1072 SDNode *N,
1073 SDValue N0,
1074 SDValue N1) {
1075 // Currently this only tries to ensure we don't undo the GEP splits done by
1076 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1077 // we check if the following transformation would be problematic:
1078 // (load/store (add, (add, x, offset1), offset2)) ->
1079 // (load/store (add, x, offset1+offset2)).
1080
1081 // (load/store (add, (add, x, y), offset2)) ->
1082 // (load/store (add, (add, x, offset2), y)).
1083
1084 if (N0.getOpcode() != ISD::ADD)
1085 return false;
1086
1087 // Check for vscale addressing modes.
1088 // (load/store (add/sub (add x, y), vscale))
1089 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1090 // (load/store (add/sub (add x, y), (mul vscale, C)))
1091 if ((N1.getOpcode() == ISD::VSCALE ||
1092 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1093 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1094 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1095 N1.getValueType().getFixedSizeInBits() <= 64) {
1096 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1097 ? N1.getConstantOperandVal(0)
1098 : (N1.getOperand(0).getConstantOperandVal(0) *
1099 (N1.getOpcode() == ISD::SHL
1100 ? (1LL << N1.getConstantOperandVal(1))
1101 : N1.getConstantOperandVal(1)));
1102 if (Opc == ISD::SUB)
1103 ScalableOffset = -ScalableOffset;
1104 if (all_of(N->uses(), [&](SDNode *Node) {
1105 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1106 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1108 AM.HasBaseReg = true;
1109 AM.ScalableOffset = ScalableOffset;
1110 EVT VT = LoadStore->getMemoryVT();
1111 unsigned AS = LoadStore->getAddressSpace();
1112 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1113 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1114 AS);
1115 }
1116 return false;
1117 }))
1118 return true;
1119 }
1120
1121 if (Opc != ISD::ADD)
1122 return false;
1123
1124 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1125 if (!C2)
1126 return false;
1127
1128 const APInt &C2APIntVal = C2->getAPIntValue();
1129 if (C2APIntVal.getSignificantBits() > 64)
1130 return false;
1131
1132 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1133 if (N0.hasOneUse())
1134 return false;
1135
1136 const APInt &C1APIntVal = C1->getAPIntValue();
1137 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1138 if (CombinedValueIntVal.getSignificantBits() > 64)
1139 return false;
1140 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1141
1142 for (SDNode *Node : N->uses()) {
1143 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1144 // Is x[offset2] already not a legal addressing mode? If so then
1145 // reassociating the constants breaks nothing (we test offset2 because
1146 // that's the one we hope to fold into the load or store).
1148 AM.HasBaseReg = true;
1149 AM.BaseOffs = C2APIntVal.getSExtValue();
1150 EVT VT = LoadStore->getMemoryVT();
1151 unsigned AS = LoadStore->getAddressSpace();
1152 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1153 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1154 continue;
1155
1156 // Would x[offset1+offset2] still be a legal addressing mode?
1157 AM.BaseOffs = CombinedValue;
1158 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1159 return true;
1160 }
1161 }
1162 } else {
1163 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1164 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1165 return false;
1166
1167 for (SDNode *Node : N->uses()) {
1168 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1169 if (!LoadStore)
1170 return false;
1171
1172 // Is x[offset2] a legal addressing mode? If so then
1173 // reassociating the constants breaks address pattern
1175 AM.HasBaseReg = true;
1176 AM.BaseOffs = C2APIntVal.getSExtValue();
1177 EVT VT = LoadStore->getMemoryVT();
1178 unsigned AS = LoadStore->getAddressSpace();
1179 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1180 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1181 return false;
1182 }
1183 return true;
1184 }
1185
1186 return false;
1187}
1188
1189/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1190/// \p N0 is the same kind of operation as \p Opc.
1191SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1192 SDValue N0, SDValue N1,
1193 SDNodeFlags Flags) {
1194 EVT VT = N0.getValueType();
1195
1196 if (N0.getOpcode() != Opc)
1197 return SDValue();
1198
1199 SDValue N00 = N0.getOperand(0);
1200 SDValue N01 = N0.getOperand(1);
1201
1203 SDNodeFlags NewFlags;
1204 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1205 Flags.hasNoUnsignedWrap())
1206 NewFlags.setNoUnsignedWrap(true);
1207
1209 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1210 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1211 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1212 return SDValue();
1213 }
1214 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1215 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1216 // iff (op x, c1) has one use
1217 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1218 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1219 }
1220 }
1221
1222 // Check for repeated operand logic simplifications.
1223 if (Opc == ISD::AND || Opc == ISD::OR) {
1224 // (N00 & N01) & N00 --> N00 & N01
1225 // (N00 & N01) & N01 --> N00 & N01
1226 // (N00 | N01) | N00 --> N00 | N01
1227 // (N00 | N01) | N01 --> N00 | N01
1228 if (N1 == N00 || N1 == N01)
1229 return N0;
1230 }
1231 if (Opc == ISD::XOR) {
1232 // (N00 ^ N01) ^ N00 --> N01
1233 if (N1 == N00)
1234 return N01;
1235 // (N00 ^ N01) ^ N01 --> N00
1236 if (N1 == N01)
1237 return N00;
1238 }
1239
1240 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1241 if (N1 != N01) {
1242 // Reassociate if (op N00, N1) already exist
1243 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1244 // if Op (Op N00, N1), N01 already exist
1245 // we need to stop reassciate to avoid dead loop
1246 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1247 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1248 }
1249 }
1250
1251 if (N1 != N00) {
1252 // Reassociate if (op N01, N1) already exist
1253 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1254 // if Op (Op N01, N1), N00 already exist
1255 // we need to stop reassciate to avoid dead loop
1256 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1257 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1258 }
1259 }
1260
1261 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1262 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1263 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1264 // comparisons with the same predicate. This enables optimizations as the
1265 // following one:
1266 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1267 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1268 if (Opc == ISD::AND || Opc == ISD::OR) {
1269 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1270 N01->getOpcode() == ISD::SETCC) {
1271 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1272 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1273 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1274 if (CC1 == CC00 && CC1 != CC01) {
1275 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1276 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1277 }
1278 if (CC1 == CC01 && CC1 != CC00) {
1279 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1280 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1281 }
1282 }
1283 }
1284 }
1285
1286 return SDValue();
1287}
1288
1289/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1290/// same kind of operation as \p Opc.
1291SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1292 SDValue N1, SDNodeFlags Flags) {
1293 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1294
1295 // Floating-point reassociation is not allowed without loose FP math.
1296 if (N0.getValueType().isFloatingPoint() ||
1298 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1299 return SDValue();
1300
1301 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1302 return Combined;
1303 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1304 return Combined;
1305 return SDValue();
1306}
1307
1308// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1309// Note that we only expect Flags to be passed from FP operations. For integer
1310// operations they need to be dropped.
1311SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1312 const SDLoc &DL, EVT VT, SDValue N0,
1313 SDValue N1, SDNodeFlags Flags) {
1314 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1315 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1316 N0->hasOneUse() && N1->hasOneUse() &&
1318 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1319 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1320 return DAG.getNode(RedOpc, DL, VT,
1321 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1322 N0.getOperand(0), N1.getOperand(0)));
1323 }
1324 return SDValue();
1325}
1326
1327SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1328 bool AddTo) {
1329 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1330 ++NodesCombined;
1331 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1332 To[0].dump(&DAG);
1333 dbgs() << " and " << NumTo - 1 << " other values\n");
1334 for (unsigned i = 0, e = NumTo; i != e; ++i)
1335 assert((!To[i].getNode() ||
1336 N->getValueType(i) == To[i].getValueType()) &&
1337 "Cannot combine value to value of different type!");
1338
1339 WorklistRemover DeadNodes(*this);
1340 DAG.ReplaceAllUsesWith(N, To);
1341 if (AddTo) {
1342 // Push the new nodes and any users onto the worklist
1343 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1344 if (To[i].getNode())
1345 AddToWorklistWithUsers(To[i].getNode());
1346 }
1347 }
1348
1349 // Finally, if the node is now dead, remove it from the graph. The node
1350 // may not be dead if the replacement process recursively simplified to
1351 // something else needing this node.
1352 if (N->use_empty())
1353 deleteAndRecombine(N);
1354 return SDValue(N, 0);
1355}
1356
1357void DAGCombiner::
1358CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1359 // Replace the old value with the new one.
1360 ++NodesCombined;
1361 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1362 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1363
1364 // Replace all uses.
1365 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1366
1367 // Push the new node and any (possibly new) users onto the worklist.
1368 AddToWorklistWithUsers(TLO.New.getNode());
1369
1370 // Finally, if the node is now dead, remove it from the graph.
1371 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1372}
1373
1374/// Check the specified integer node value to see if it can be simplified or if
1375/// things it uses can be simplified by bit propagation. If so, return true.
1376bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1377 const APInt &DemandedElts,
1378 bool AssumeSingleUse) {
1379 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1380 KnownBits Known;
1381 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1382 AssumeSingleUse))
1383 return false;
1384
1385 // Revisit the node.
1386 AddToWorklist(Op.getNode());
1387
1388 CommitTargetLoweringOpt(TLO);
1389 return true;
1390}
1391
1392/// Check the specified vector node value to see if it can be simplified or
1393/// if things it uses can be simplified as it only uses some of the elements.
1394/// If so, return true.
1395bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1396 const APInt &DemandedElts,
1397 bool AssumeSingleUse) {
1398 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1399 APInt KnownUndef, KnownZero;
1400 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1401 TLO, 0, AssumeSingleUse))
1402 return false;
1403
1404 // Revisit the node.
1405 AddToWorklist(Op.getNode());
1406
1407 CommitTargetLoweringOpt(TLO);
1408 return true;
1409}
1410
1411void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1412 SDLoc DL(Load);
1413 EVT VT = Load->getValueType(0);
1414 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1415
1416 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1417 Trunc.dump(&DAG); dbgs() << '\n');
1418
1419 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1420 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1421
1422 AddToWorklist(Trunc.getNode());
1423 recursivelyDeleteUnusedNodes(Load);
1424}
1425
1426SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1427 Replace = false;
1428 SDLoc DL(Op);
1429 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1430 LoadSDNode *LD = cast<LoadSDNode>(Op);
1431 EVT MemVT = LD->getMemoryVT();
1433 : LD->getExtensionType();
1434 Replace = true;
1435 return DAG.getExtLoad(ExtType, DL, PVT,
1436 LD->getChain(), LD->getBasePtr(),
1437 MemVT, LD->getMemOperand());
1438 }
1439
1440 unsigned Opc = Op.getOpcode();
1441 switch (Opc) {
1442 default: break;
1443 case ISD::AssertSext:
1444 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1445 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1446 break;
1447 case ISD::AssertZext:
1448 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1449 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1450 break;
1451 case ISD::Constant: {
1452 unsigned ExtOpc =
1453 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1454 return DAG.getNode(ExtOpc, DL, PVT, Op);
1455 }
1456 }
1457
1458 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1459 return SDValue();
1460 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1461}
1462
1463SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1465 return SDValue();
1466 EVT OldVT = Op.getValueType();
1467 SDLoc DL(Op);
1468 bool Replace = false;
1469 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1470 if (!NewOp.getNode())
1471 return SDValue();
1472 AddToWorklist(NewOp.getNode());
1473
1474 if (Replace)
1475 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1476 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1477 DAG.getValueType(OldVT));
1478}
1479
1480SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1481 EVT OldVT = Op.getValueType();
1482 SDLoc DL(Op);
1483 bool Replace = false;
1484 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1485 if (!NewOp.getNode())
1486 return SDValue();
1487 AddToWorklist(NewOp.getNode());
1488
1489 if (Replace)
1490 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1491 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1492}
1493
1494/// Promote the specified integer binary operation if the target indicates it is
1495/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1496/// i32 since i16 instructions are longer.
1497SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1498 if (!LegalOperations)
1499 return SDValue();
1500
1501 EVT VT = Op.getValueType();
1502 if (VT.isVector() || !VT.isInteger())
1503 return SDValue();
1504
1505 // If operation type is 'undesirable', e.g. i16 on x86, consider
1506 // promoting it.
1507 unsigned Opc = Op.getOpcode();
1508 if (TLI.isTypeDesirableForOp(Opc, VT))
1509 return SDValue();
1510
1511 EVT PVT = VT;
1512 // Consult target whether it is a good idea to promote this operation and
1513 // what's the right type to promote it to.
1514 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1515 assert(PVT != VT && "Don't know what type to promote to!");
1516
1517 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1518
1519 bool Replace0 = false;
1520 SDValue N0 = Op.getOperand(0);
1521 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1522
1523 bool Replace1 = false;
1524 SDValue N1 = Op.getOperand(1);
1525 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1526 SDLoc DL(Op);
1527
1528 SDValue RV =
1529 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1530
1531 // We are always replacing N0/N1's use in N and only need additional
1532 // replacements if there are additional uses.
1533 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1534 // (SDValue) here because the node may reference multiple values
1535 // (for example, the chain value of a load node).
1536 Replace0 &= !N0->hasOneUse();
1537 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1538
1539 // Combine Op here so it is preserved past replacements.
1540 CombineTo(Op.getNode(), RV);
1541
1542 // If operands have a use ordering, make sure we deal with
1543 // predecessor first.
1544 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1545 std::swap(N0, N1);
1546 std::swap(NN0, NN1);
1547 }
1548
1549 if (Replace0) {
1550 AddToWorklist(NN0.getNode());
1551 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1552 }
1553 if (Replace1) {
1554 AddToWorklist(NN1.getNode());
1555 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1556 }
1557 return Op;
1558 }
1559 return SDValue();
1560}
1561
1562/// Promote the specified integer shift operation if the target indicates it is
1563/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1564/// i32 since i16 instructions are longer.
1565SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1566 if (!LegalOperations)
1567 return SDValue();
1568
1569 EVT VT = Op.getValueType();
1570 if (VT.isVector() || !VT.isInteger())
1571 return SDValue();
1572
1573 // If operation type is 'undesirable', e.g. i16 on x86, consider
1574 // promoting it.
1575 unsigned Opc = Op.getOpcode();
1576 if (TLI.isTypeDesirableForOp(Opc, VT))
1577 return SDValue();
1578
1579 EVT PVT = VT;
1580 // Consult target whether it is a good idea to promote this operation and
1581 // what's the right type to promote it to.
1582 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1583 assert(PVT != VT && "Don't know what type to promote to!");
1584
1585 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1586
1587 bool Replace = false;
1588 SDValue N0 = Op.getOperand(0);
1589 if (Opc == ISD::SRA)
1590 N0 = SExtPromoteOperand(N0, PVT);
1591 else if (Opc == ISD::SRL)
1592 N0 = ZExtPromoteOperand(N0, PVT);
1593 else
1594 N0 = PromoteOperand(N0, PVT, Replace);
1595
1596 if (!N0.getNode())
1597 return SDValue();
1598
1599 SDLoc DL(Op);
1600 SDValue N1 = Op.getOperand(1);
1601 SDValue RV =
1602 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1603
1604 if (Replace)
1605 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1606
1607 // Deal with Op being deleted.
1608 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1609 return RV;
1610 }
1611 return SDValue();
1612}
1613
1614SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1615 if (!LegalOperations)
1616 return SDValue();
1617
1618 EVT VT = Op.getValueType();
1619 if (VT.isVector() || !VT.isInteger())
1620 return SDValue();
1621
1622 // If operation type is 'undesirable', e.g. i16 on x86, consider
1623 // promoting it.
1624 unsigned Opc = Op.getOpcode();
1625 if (TLI.isTypeDesirableForOp(Opc, VT))
1626 return SDValue();
1627
1628 EVT PVT = VT;
1629 // Consult target whether it is a good idea to promote this operation and
1630 // what's the right type to promote it to.
1631 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1632 assert(PVT != VT && "Don't know what type to promote to!");
1633 // fold (aext (aext x)) -> (aext x)
1634 // fold (aext (zext x)) -> (zext x)
1635 // fold (aext (sext x)) -> (sext x)
1636 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1637 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1638 }
1639 return SDValue();
1640}
1641
1642bool DAGCombiner::PromoteLoad(SDValue Op) {
1643 if (!LegalOperations)
1644 return false;
1645
1646 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1647 return false;
1648
1649 EVT VT = Op.getValueType();
1650 if (VT.isVector() || !VT.isInteger())
1651 return false;
1652
1653 // If operation type is 'undesirable', e.g. i16 on x86, consider
1654 // promoting it.
1655 unsigned Opc = Op.getOpcode();
1656 if (TLI.isTypeDesirableForOp(Opc, VT))
1657 return false;
1658
1659 EVT PVT = VT;
1660 // Consult target whether it is a good idea to promote this operation and
1661 // what's the right type to promote it to.
1662 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1663 assert(PVT != VT && "Don't know what type to promote to!");
1664
1665 SDLoc DL(Op);
1666 SDNode *N = Op.getNode();
1667 LoadSDNode *LD = cast<LoadSDNode>(N);
1668 EVT MemVT = LD->getMemoryVT();
1670 : LD->getExtensionType();
1671 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1672 LD->getChain(), LD->getBasePtr(),
1673 MemVT, LD->getMemOperand());
1674 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1675
1676 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1677 Result.dump(&DAG); dbgs() << '\n');
1678
1680 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1681
1682 AddToWorklist(Result.getNode());
1683 recursivelyDeleteUnusedNodes(N);
1684 return true;
1685 }
1686
1687 return false;
1688}
1689
1690/// Recursively delete a node which has no uses and any operands for
1691/// which it is the only use.
1692///
1693/// Note that this both deletes the nodes and removes them from the worklist.
1694/// It also adds any nodes who have had a user deleted to the worklist as they
1695/// may now have only one use and subject to other combines.
1696bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1697 if (!N->use_empty())
1698 return false;
1699
1701 Nodes.insert(N);
1702 do {
1703 N = Nodes.pop_back_val();
1704 if (!N)
1705 continue;
1706
1707 if (N->use_empty()) {
1708 for (const SDValue &ChildN : N->op_values())
1709 Nodes.insert(ChildN.getNode());
1710
1711 removeFromWorklist(N);
1712 DAG.DeleteNode(N);
1713 } else {
1714 AddToWorklist(N);
1715 }
1716 } while (!Nodes.empty());
1717 return true;
1718}
1719
1720//===----------------------------------------------------------------------===//
1721// Main DAG Combiner implementation
1722//===----------------------------------------------------------------------===//
1723
1724void DAGCombiner::Run(CombineLevel AtLevel) {
1725 // set the instance variables, so that the various visit routines may use it.
1726 Level = AtLevel;
1727 LegalDAG = Level >= AfterLegalizeDAG;
1728 LegalOperations = Level >= AfterLegalizeVectorOps;
1729 LegalTypes = Level >= AfterLegalizeTypes;
1730
1731 WorklistInserter AddNodes(*this);
1732
1733 // Add all the dag nodes to the worklist.
1734 //
1735 // Note: All nodes are not added to PruningList here, this is because the only
1736 // nodes which can be deleted are those which have no uses and all other nodes
1737 // which would otherwise be added to the worklist by the first call to
1738 // getNextWorklistEntry are already present in it.
1739 for (SDNode &Node : DAG.allnodes())
1740 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1741
1742 // Create a dummy node (which is not added to allnodes), that adds a reference
1743 // to the root node, preventing it from being deleted, and tracking any
1744 // changes of the root.
1745 HandleSDNode Dummy(DAG.getRoot());
1746
1747 // While we have a valid worklist entry node, try to combine it.
1748 while (SDNode *N = getNextWorklistEntry()) {
1749 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1750 // N is deleted from the DAG, since they too may now be dead or may have a
1751 // reduced number of uses, allowing other xforms.
1752 if (recursivelyDeleteUnusedNodes(N))
1753 continue;
1754
1755 WorklistRemover DeadNodes(*this);
1756
1757 // If this combine is running after legalizing the DAG, re-legalize any
1758 // nodes pulled off the worklist.
1759 if (LegalDAG) {
1760 SmallSetVector<SDNode *, 16> UpdatedNodes;
1761 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1762
1763 for (SDNode *LN : UpdatedNodes)
1764 AddToWorklistWithUsers(LN);
1765
1766 if (!NIsValid)
1767 continue;
1768 }
1769
1770 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1771
1772 // Add any operands of the new node which have not yet been combined to the
1773 // worklist as well. getNextWorklistEntry flags nodes that have been
1774 // combined before. Because the worklist uniques things already, this won't
1775 // repeatedly process the same operand.
1776 for (const SDValue &ChildN : N->op_values())
1777 AddToWorklist(ChildN.getNode(), /*IsCandidateForPruning=*/true,
1778 /*SkipIfCombinedBefore=*/true);
1779
1780 SDValue RV = combine(N);
1781
1782 if (!RV.getNode())
1783 continue;
1784
1785 ++NodesCombined;
1786
1787 // If we get back the same node we passed in, rather than a new node or
1788 // zero, we know that the node must have defined multiple values and
1789 // CombineTo was used. Since CombineTo takes care of the worklist
1790 // mechanics for us, we have no work to do in this case.
1791 if (RV.getNode() == N)
1792 continue;
1793
1794 assert(N->getOpcode() != ISD::DELETED_NODE &&
1795 RV.getOpcode() != ISD::DELETED_NODE &&
1796 "Node was deleted but visit returned new node!");
1797
1798 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1799
1800 if (N->getNumValues() == RV->getNumValues())
1801 DAG.ReplaceAllUsesWith(N, RV.getNode());
1802 else {
1803 assert(N->getValueType(0) == RV.getValueType() &&
1804 N->getNumValues() == 1 && "Type mismatch");
1805 DAG.ReplaceAllUsesWith(N, &RV);
1806 }
1807
1808 // Push the new node and any users onto the worklist. Omit this if the
1809 // new node is the EntryToken (e.g. if a store managed to get optimized
1810 // out), because re-visiting the EntryToken and its users will not uncover
1811 // any additional opportunities, but there may be a large number of such
1812 // users, potentially causing compile time explosion.
1813 if (RV.getOpcode() != ISD::EntryToken)
1814 AddToWorklistWithUsers(RV.getNode());
1815
1816 // Finally, if the node is now dead, remove it from the graph. The node
1817 // may not be dead if the replacement process recursively simplified to
1818 // something else needing this node. This will also take care of adding any
1819 // operands which have lost a user to the worklist.
1820 recursivelyDeleteUnusedNodes(N);
1821 }
1822
1823 // If the root changed (e.g. it was a dead load, update the root).
1824 DAG.setRoot(Dummy.getValue());
1825 DAG.RemoveDeadNodes();
1826}
1827
1828SDValue DAGCombiner::visit(SDNode *N) {
1829 // clang-format off
1830 switch (N->getOpcode()) {
1831 default: break;
1832 case ISD::TokenFactor: return visitTokenFactor(N);
1833 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1834 case ISD::ADD: return visitADD(N);
1835 case ISD::SUB: return visitSUB(N);
1836 case ISD::SADDSAT:
1837 case ISD::UADDSAT: return visitADDSAT(N);
1838 case ISD::SSUBSAT:
1839 case ISD::USUBSAT: return visitSUBSAT(N);
1840 case ISD::ADDC: return visitADDC(N);
1841 case ISD::SADDO:
1842 case ISD::UADDO: return visitADDO(N);
1843 case ISD::SUBC: return visitSUBC(N);
1844 case ISD::SSUBO:
1845 case ISD::USUBO: return visitSUBO(N);
1846 case ISD::ADDE: return visitADDE(N);
1847 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1848 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1849 case ISD::SUBE: return visitSUBE(N);
1850 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1851 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1852 case ISD::SMULFIX:
1853 case ISD::SMULFIXSAT:
1854 case ISD::UMULFIX:
1855 case ISD::UMULFIXSAT: return visitMULFIX(N);
1856 case ISD::MUL: return visitMUL<EmptyMatchContext>(N);
1857 case ISD::SDIV: return visitSDIV(N);
1858 case ISD::UDIV: return visitUDIV(N);
1859 case ISD::SREM:
1860 case ISD::UREM: return visitREM(N);
1861 case ISD::MULHU: return visitMULHU(N);
1862 case ISD::MULHS: return visitMULHS(N);
1863 case ISD::AVGFLOORS:
1864 case ISD::AVGFLOORU:
1865 case ISD::AVGCEILS:
1866 case ISD::AVGCEILU: return visitAVG(N);
1867 case ISD::ABDS:
1868 case ISD::ABDU: return visitABD(N);
1869 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1870 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1871 case ISD::SMULO:
1872 case ISD::UMULO: return visitMULO(N);
1873 case ISD::SMIN:
1874 case ISD::SMAX:
1875 case ISD::UMIN:
1876 case ISD::UMAX: return visitIMINMAX(N);
1877 case ISD::AND: return visitAND(N);
1878 case ISD::OR: return visitOR(N);
1879 case ISD::XOR: return visitXOR(N);
1880 case ISD::SHL: return visitSHL(N);
1881 case ISD::SRA: return visitSRA(N);
1882 case ISD::SRL: return visitSRL(N);
1883 case ISD::ROTR:
1884 case ISD::ROTL: return visitRotate(N);
1885 case ISD::FSHL:
1886 case ISD::FSHR: return visitFunnelShift(N);
1887 case ISD::SSHLSAT:
1888 case ISD::USHLSAT: return visitSHLSAT(N);
1889 case ISD::ABS: return visitABS(N);
1890 case ISD::BSWAP: return visitBSWAP(N);
1891 case ISD::BITREVERSE: return visitBITREVERSE(N);
1892 case ISD::CTLZ: return visitCTLZ(N);
1893 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1894 case ISD::CTTZ: return visitCTTZ(N);
1895 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1896 case ISD::CTPOP: return visitCTPOP(N);
1897 case ISD::SELECT: return visitSELECT(N);
1898 case ISD::VSELECT: return visitVSELECT(N);
1899 case ISD::SELECT_CC: return visitSELECT_CC(N);
1900 case ISD::SETCC: return visitSETCC(N);
1901 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1902 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1903 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1904 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1905 case ISD::AssertSext:
1906 case ISD::AssertZext: return visitAssertExt(N);
1907 case ISD::AssertAlign: return visitAssertAlign(N);
1908 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1911 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1912 case ISD::TRUNCATE: return visitTRUNCATE(N);
1913 case ISD::BITCAST: return visitBITCAST(N);
1914 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1915 case ISD::FADD: return visitFADD(N);
1916 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1917 case ISD::FSUB: return visitFSUB(N);
1918 case ISD::FMUL: return visitFMUL(N);
1919 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1920 case ISD::FMAD: return visitFMAD(N);
1921 case ISD::FDIV: return visitFDIV(N);
1922 case ISD::FREM: return visitFREM(N);
1923 case ISD::FSQRT: return visitFSQRT(N);
1924 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1925 case ISD::FPOW: return visitFPOW(N);
1926 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1927 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1928 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1929 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1930 case ISD::LRINT:
1931 case ISD::LLRINT: return visitXRINT(N);
1932 case ISD::FP_ROUND: return visitFP_ROUND(N);
1933 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1934 case ISD::FNEG: return visitFNEG(N);
1935 case ISD::FABS: return visitFABS(N);
1936 case ISD::FFLOOR: return visitFFLOOR(N);
1937 case ISD::FMINNUM:
1938 case ISD::FMAXNUM:
1939 case ISD::FMINIMUM:
1940 case ISD::FMAXIMUM: return visitFMinMax(N);
1941 case ISD::FCEIL: return visitFCEIL(N);
1942 case ISD::FTRUNC: return visitFTRUNC(N);
1943 case ISD::FFREXP: return visitFFREXP(N);
1944 case ISD::BRCOND: return visitBRCOND(N);
1945 case ISD::BR_CC: return visitBR_CC(N);
1946 case ISD::LOAD: return visitLOAD(N);
1947 case ISD::STORE: return visitSTORE(N);
1948 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1949 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1950 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1951 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1952 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1953 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1954 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1955 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1956 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1957 case ISD::MGATHER: return visitMGATHER(N);
1958 case ISD::MLOAD: return visitMLOAD(N);
1959 case ISD::MSCATTER: return visitMSCATTER(N);
1960 case ISD::MSTORE: return visitMSTORE(N);
1961 case ISD::VECTOR_COMPRESS: return visitVECTOR_COMPRESS(N);
1962 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1963 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1964 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1965 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1966 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1967 case ISD::FREEZE: return visitFREEZE(N);
1968 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1969 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1972 case ISD::VECREDUCE_ADD:
1973 case ISD::VECREDUCE_MUL:
1974 case ISD::VECREDUCE_AND:
1975 case ISD::VECREDUCE_OR:
1976 case ISD::VECREDUCE_XOR:
1984 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1985#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1986#include "llvm/IR/VPIntrinsics.def"
1987 return visitVPOp(N);
1988 }
1989 // clang-format on
1990 return SDValue();
1991}
1992
1993SDValue DAGCombiner::combine(SDNode *N) {
1994 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1995 return SDValue();
1996
1997 SDValue RV;
1998 if (!DisableGenericCombines)
1999 RV = visit(N);
2000
2001 // If nothing happened, try a target-specific DAG combine.
2002 if (!RV.getNode()) {
2003 assert(N->getOpcode() != ISD::DELETED_NODE &&
2004 "Node was deleted but visit returned NULL!");
2005
2006 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2007 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2008
2009 // Expose the DAG combiner to the target combiner impls.
2011 DagCombineInfo(DAG, Level, false, this);
2012
2013 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2014 }
2015 }
2016
2017 // If nothing happened still, try promoting the operation.
2018 if (!RV.getNode()) {
2019 switch (N->getOpcode()) {
2020 default: break;
2021 case ISD::ADD:
2022 case ISD::SUB:
2023 case ISD::MUL:
2024 case ISD::AND:
2025 case ISD::OR:
2026 case ISD::XOR:
2027 RV = PromoteIntBinOp(SDValue(N, 0));
2028 break;
2029 case ISD::SHL:
2030 case ISD::SRA:
2031 case ISD::SRL:
2032 RV = PromoteIntShiftOp(SDValue(N, 0));
2033 break;
2034 case ISD::SIGN_EXTEND:
2035 case ISD::ZERO_EXTEND:
2036 case ISD::ANY_EXTEND:
2037 RV = PromoteExtend(SDValue(N, 0));
2038 break;
2039 case ISD::LOAD:
2040 if (PromoteLoad(SDValue(N, 0)))
2041 RV = SDValue(N, 0);
2042 break;
2043 }
2044 }
2045
2046 // If N is a commutative binary node, try to eliminate it if the commuted
2047 // version is already present in the DAG.
2048 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2049 SDValue N0 = N->getOperand(0);
2050 SDValue N1 = N->getOperand(1);
2051
2052 // Constant operands are canonicalized to RHS.
2053 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2054 SDValue Ops[] = {N1, N0};
2055 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2056 N->getFlags());
2057 if (CSENode)
2058 return SDValue(CSENode, 0);
2059 }
2060 }
2061
2062 return RV;
2063}
2064
2065/// Given a node, return its input chain if it has one, otherwise return a null
2066/// sd operand.
2068 if (unsigned NumOps = N->getNumOperands()) {
2069 if (N->getOperand(0).getValueType() == MVT::Other)
2070 return N->getOperand(0);
2071 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2072 return N->getOperand(NumOps-1);
2073 for (unsigned i = 1; i < NumOps-1; ++i)
2074 if (N->getOperand(i).getValueType() == MVT::Other)
2075 return N->getOperand(i);
2076 }
2077 return SDValue();
2078}
2079
2080SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2081 // If N has two operands, where one has an input chain equal to the other,
2082 // the 'other' chain is redundant.
2083 if (N->getNumOperands() == 2) {
2084 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2085 return N->getOperand(0);
2086 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2087 return N->getOperand(1);
2088 }
2089
2090 // Don't simplify token factors if optnone.
2091 if (OptLevel == CodeGenOptLevel::None)
2092 return SDValue();
2093
2094 // Don't simplify the token factor if the node itself has too many operands.
2095 if (N->getNumOperands() > TokenFactorInlineLimit)
2096 return SDValue();
2097
2098 // If the sole user is a token factor, we should make sure we have a
2099 // chance to merge them together. This prevents TF chains from inhibiting
2100 // optimizations.
2101 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2102 AddToWorklist(*(N->use_begin()));
2103
2104 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2105 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2107 bool Changed = false; // If we should replace this token factor.
2108
2109 // Start out with this token factor.
2110 TFs.push_back(N);
2111
2112 // Iterate through token factors. The TFs grows when new token factors are
2113 // encountered.
2114 for (unsigned i = 0; i < TFs.size(); ++i) {
2115 // Limit number of nodes to inline, to avoid quadratic compile times.
2116 // We have to add the outstanding Token Factors to Ops, otherwise we might
2117 // drop Ops from the resulting Token Factors.
2118 if (Ops.size() > TokenFactorInlineLimit) {
2119 for (unsigned j = i; j < TFs.size(); j++)
2120 Ops.emplace_back(TFs[j], 0);
2121 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2122 // combiner worklist later.
2123 TFs.resize(i);
2124 break;
2125 }
2126
2127 SDNode *TF = TFs[i];
2128 // Check each of the operands.
2129 for (const SDValue &Op : TF->op_values()) {
2130 switch (Op.getOpcode()) {
2131 case ISD::EntryToken:
2132 // Entry tokens don't need to be added to the list. They are
2133 // redundant.
2134 Changed = true;
2135 break;
2136
2137 case ISD::TokenFactor:
2138 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2139 // Queue up for processing.
2140 TFs.push_back(Op.getNode());
2141 Changed = true;
2142 break;
2143 }
2144 [[fallthrough]];
2145
2146 default:
2147 // Only add if it isn't already in the list.
2148 if (SeenOps.insert(Op.getNode()).second)
2149 Ops.push_back(Op);
2150 else
2151 Changed = true;
2152 break;
2153 }
2154 }
2155 }
2156
2157 // Re-visit inlined Token Factors, to clean them up in case they have been
2158 // removed. Skip the first Token Factor, as this is the current node.
2159 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2160 AddToWorklist(TFs[i]);
2161
2162 // Remove Nodes that are chained to another node in the list. Do so
2163 // by walking up chains breath-first stopping when we've seen
2164 // another operand. In general we must climb to the EntryNode, but we can exit
2165 // early if we find all remaining work is associated with just one operand as
2166 // no further pruning is possible.
2167
2168 // List of nodes to search through and original Ops from which they originate.
2170 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2171 SmallPtrSet<SDNode *, 16> SeenChains;
2172 bool DidPruneOps = false;
2173
2174 unsigned NumLeftToConsider = 0;
2175 for (const SDValue &Op : Ops) {
2176 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2177 OpWorkCount.push_back(1);
2178 }
2179
2180 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2181 // If this is an Op, we can remove the op from the list. Remark any
2182 // search associated with it as from the current OpNumber.
2183 if (SeenOps.contains(Op)) {
2184 Changed = true;
2185 DidPruneOps = true;
2186 unsigned OrigOpNumber = 0;
2187 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2188 OrigOpNumber++;
2189 assert((OrigOpNumber != Ops.size()) &&
2190 "expected to find TokenFactor Operand");
2191 // Re-mark worklist from OrigOpNumber to OpNumber
2192 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2193 if (Worklist[i].second == OrigOpNumber) {
2194 Worklist[i].second = OpNumber;
2195 }
2196 }
2197 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2198 OpWorkCount[OrigOpNumber] = 0;
2199 NumLeftToConsider--;
2200 }
2201 // Add if it's a new chain
2202 if (SeenChains.insert(Op).second) {
2203 OpWorkCount[OpNumber]++;
2204 Worklist.push_back(std::make_pair(Op, OpNumber));
2205 }
2206 };
2207
2208 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2209 // We need at least be consider at least 2 Ops to prune.
2210 if (NumLeftToConsider <= 1)
2211 break;
2212 auto CurNode = Worklist[i].first;
2213 auto CurOpNumber = Worklist[i].second;
2214 assert((OpWorkCount[CurOpNumber] > 0) &&
2215 "Node should not appear in worklist");
2216 switch (CurNode->getOpcode()) {
2217 case ISD::EntryToken:
2218 // Hitting EntryToken is the only way for the search to terminate without
2219 // hitting
2220 // another operand's search. Prevent us from marking this operand
2221 // considered.
2222 NumLeftToConsider++;
2223 break;
2224 case ISD::TokenFactor:
2225 for (const SDValue &Op : CurNode->op_values())
2226 AddToWorklist(i, Op.getNode(), CurOpNumber);
2227 break;
2229 case ISD::LIFETIME_END:
2230 case ISD::CopyFromReg:
2231 case ISD::CopyToReg:
2232 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2233 break;
2234 default:
2235 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2236 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2237 break;
2238 }
2239 OpWorkCount[CurOpNumber]--;
2240 if (OpWorkCount[CurOpNumber] == 0)
2241 NumLeftToConsider--;
2242 }
2243
2244 // If we've changed things around then replace token factor.
2245 if (Changed) {
2247 if (Ops.empty()) {
2248 // The entry token is the only possible outcome.
2249 Result = DAG.getEntryNode();
2250 } else {
2251 if (DidPruneOps) {
2252 SmallVector<SDValue, 8> PrunedOps;
2253 //
2254 for (const SDValue &Op : Ops) {
2255 if (SeenChains.count(Op.getNode()) == 0)
2256 PrunedOps.push_back(Op);
2257 }
2258 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2259 } else {
2260 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2261 }
2262 }
2263 return Result;
2264 }
2265 return SDValue();
2266}
2267
2268/// MERGE_VALUES can always be eliminated.
2269SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2270 WorklistRemover DeadNodes(*this);
2271 // Replacing results may cause a different MERGE_VALUES to suddenly
2272 // be CSE'd with N, and carry its uses with it. Iterate until no
2273 // uses remain, to ensure that the node can be safely deleted.
2274 // First add the users of this node to the work list so that they
2275 // can be tried again once they have new operands.
2276 AddUsersToWorklist(N);
2277 do {
2278 // Do as a single replacement to avoid rewalking use lists.
2280 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2281 Ops.push_back(N->getOperand(i));
2282 DAG.ReplaceAllUsesWith(N, Ops.data());
2283 } while (!N->use_empty());
2284 deleteAndRecombine(N);
2285 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2286}
2287
2288/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2289/// ConstantSDNode pointer else nullptr.
2291 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2292 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2293}
2294
2295// isTruncateOf - If N is a truncate of some other value, return true, record
2296// the value being truncated in Op and which of Op's bits are zero/one in Known.
2297// This function computes KnownBits to avoid a duplicated call to
2298// computeKnownBits in the caller.
2300 KnownBits &Known) {
2301 if (N->getOpcode() == ISD::TRUNCATE) {
2302 Op = N->getOperand(0);
2303 Known = DAG.computeKnownBits(Op);
2304 return true;
2305 }
2306
2307 if (N.getValueType().getScalarType() != MVT::i1 ||
2308 !sd_match(
2310 return false;
2311
2312 Known = DAG.computeKnownBits(Op);
2313 return (Known.Zero | 1).isAllOnes();
2314}
2315
2316/// Return true if 'Use' is a load or a store that uses N as its base pointer
2317/// and that N may be folded in the load / store addressing mode.
2319 const TargetLowering &TLI) {
2320 EVT VT;
2321 unsigned AS;
2322
2323 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2324 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2325 return false;
2326 VT = LD->getMemoryVT();
2327 AS = LD->getAddressSpace();
2328 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2329 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2330 return false;
2331 VT = ST->getMemoryVT();
2332 AS = ST->getAddressSpace();
2333 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2334 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2335 return false;
2336 VT = LD->getMemoryVT();
2337 AS = LD->getAddressSpace();
2338 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2339 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2340 return false;
2341 VT = ST->getMemoryVT();
2342 AS = ST->getAddressSpace();
2343 } else {
2344 return false;
2345 }
2346
2348 if (N->getOpcode() == ISD::ADD) {
2349 AM.HasBaseReg = true;
2350 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2351 if (Offset)
2352 // [reg +/- imm]
2353 AM.BaseOffs = Offset->getSExtValue();
2354 else
2355 // [reg +/- reg]
2356 AM.Scale = 1;
2357 } else if (N->getOpcode() == ISD::SUB) {
2358 AM.HasBaseReg = true;
2359 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2360 if (Offset)
2361 // [reg +/- imm]
2362 AM.BaseOffs = -Offset->getSExtValue();
2363 else
2364 // [reg +/- reg]
2365 AM.Scale = 1;
2366 } else {
2367 return false;
2368 }
2369
2370 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2371 VT.getTypeForEVT(*DAG.getContext()), AS);
2372}
2373
2374/// This inverts a canonicalization in IR that replaces a variable select arm
2375/// with an identity constant. Codegen improves if we re-use the variable
2376/// operand rather than load a constant. This can also be converted into a
2377/// masked vector operation if the target supports it.
2379 bool ShouldCommuteOperands) {
2380 // Match a select as operand 1. The identity constant that we are looking for
2381 // is only valid as operand 1 of a non-commutative binop.
2382 SDValue N0 = N->getOperand(0);
2383 SDValue N1 = N->getOperand(1);
2384 if (ShouldCommuteOperands)
2385 std::swap(N0, N1);
2386
2387 // TODO: Should this apply to scalar select too?
2388 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2389 return SDValue();
2390
2391 // We can't hoist all instructions because of immediate UB (not speculatable).
2392 // For example div/rem by zero.
2394 return SDValue();
2395
2396 unsigned Opcode = N->getOpcode();
2397 EVT VT = N->getValueType(0);
2398 SDValue Cond = N1.getOperand(0);
2399 SDValue TVal = N1.getOperand(1);
2400 SDValue FVal = N1.getOperand(2);
2401
2402 // This transform increases uses of N0, so freeze it to be safe.
2403 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2404 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2405 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2406 SDValue F0 = DAG.getFreeze(N0);
2407 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2408 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2409 }
2410 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2411 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2412 SDValue F0 = DAG.getFreeze(N0);
2413 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2414 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2415 }
2416
2417 return SDValue();
2418}
2419
2420SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2421 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2422 "Unexpected binary operator");
2423
2424 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2425 auto BinOpcode = BO->getOpcode();
2426 EVT VT = BO->getValueType(0);
2427 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2428 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2429 return Sel;
2430
2431 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2432 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2433 return Sel;
2434 }
2435
2436 // Don't do this unless the old select is going away. We want to eliminate the
2437 // binary operator, not replace a binop with a select.
2438 // TODO: Handle ISD::SELECT_CC.
2439 unsigned SelOpNo = 0;
2440 SDValue Sel = BO->getOperand(0);
2441 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2442 SelOpNo = 1;
2443 Sel = BO->getOperand(1);
2444
2445 // Peek through trunc to shift amount type.
2446 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2447 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2448 // This is valid when the truncated bits of x are already zero.
2449 SDValue Op;
2450 KnownBits Known;
2451 if (isTruncateOf(DAG, Sel, Op, Known) &&
2453 Sel = Op;
2454 }
2455 }
2456
2457 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2458 return SDValue();
2459
2460 SDValue CT = Sel.getOperand(1);
2461 if (!isConstantOrConstantVector(CT, true) &&
2463 return SDValue();
2464
2465 SDValue CF = Sel.getOperand(2);
2466 if (!isConstantOrConstantVector(CF, true) &&
2468 return SDValue();
2469
2470 // Bail out if any constants are opaque because we can't constant fold those.
2471 // The exception is "and" and "or" with either 0 or -1 in which case we can
2472 // propagate non constant operands into select. I.e.:
2473 // and (select Cond, 0, -1), X --> select Cond, 0, X
2474 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2475 bool CanFoldNonConst =
2476 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2479
2480 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2481 if (!CanFoldNonConst &&
2482 !isConstantOrConstantVector(CBO, true) &&
2484 return SDValue();
2485
2486 SDLoc DL(Sel);
2487 SDValue NewCT, NewCF;
2488
2489 if (CanFoldNonConst) {
2490 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2491 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2492 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2493 NewCT = CT;
2494 else
2495 NewCT = CBO;
2496
2497 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2498 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2499 NewCF = CF;
2500 else
2501 NewCF = CBO;
2502 } else {
2503 // We have a select-of-constants followed by a binary operator with a
2504 // constant. Eliminate the binop by pulling the constant math into the
2505 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2506 // CBO, CF + CBO
2507 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2508 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2509 if (!NewCT)
2510 return SDValue();
2511
2512 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2513 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2514 if (!NewCF)
2515 return SDValue();
2516 }
2517
2518 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2519 SelectOp->setFlags(BO->getFlags());
2520 return SelectOp;
2521}
2522
2524 SelectionDAG &DAG) {
2525 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2526 "Expecting add or sub");
2527
2528 // Match a constant operand and a zext operand for the math instruction:
2529 // add Z, C
2530 // sub C, Z
2531 bool IsAdd = N->getOpcode() == ISD::ADD;
2532 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2533 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2534 auto *CN = dyn_cast<ConstantSDNode>(C);
2535 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2536 return SDValue();
2537
2538 // Match the zext operand as a setcc of a boolean.
2539 if (Z.getOperand(0).getValueType() != MVT::i1)
2540 return SDValue();
2541
2542 // Match the compare as: setcc (X & 1), 0, eq.
2543 if (!sd_match(Z.getOperand(0), m_SetCC(m_And(m_Value(), m_One()), m_Zero(),
2545 return SDValue();
2546
2547 // We are adding/subtracting a constant and an inverted low bit. Turn that
2548 // into a subtract/add of the low bit with incremented/decremented constant:
2549 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2550 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2551 EVT VT = C.getValueType();
2552 SDValue LowBit = DAG.getZExtOrTrunc(Z.getOperand(0).getOperand(0), DL, VT);
2553 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT)
2554 : DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2555 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2556}
2557
2558// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2559SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2560 SDValue N0 = N->getOperand(0);
2561 EVT VT = N0.getValueType();
2562 SDValue A, B;
2563
2564 if ((!LegalOperations || hasOperation(ISD::AVGCEILU, VT)) &&
2566 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2567 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2568 }
2569 if ((!LegalOperations || hasOperation(ISD::AVGCEILS, VT)) &&
2571 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2572 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2573 }
2574 return SDValue();
2575}
2576
2577/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2578/// a shift and add with a different constant.
2580 SelectionDAG &DAG) {
2581 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2582 "Expecting add or sub");
2583
2584 // We need a constant operand for the add/sub, and the other operand is a
2585 // logical shift right: add (srl), C or sub C, (srl).
2586 bool IsAdd = N->getOpcode() == ISD::ADD;
2587 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2588 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2589 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2590 ShiftOp.getOpcode() != ISD::SRL)
2591 return SDValue();
2592
2593 // The shift must be of a 'not' value.
2594 SDValue Not = ShiftOp.getOperand(0);
2595 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2596 return SDValue();
2597
2598 // The shift must be moving the sign bit to the least-significant-bit.
2599 EVT VT = ShiftOp.getValueType();
2600 SDValue ShAmt = ShiftOp.getOperand(1);
2601 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2602 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2603 return SDValue();
2604
2605 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2606 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2607 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2608 if (SDValue NewC = DAG.FoldConstantArithmetic(
2609 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2610 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2611 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2612 Not.getOperand(0), ShAmt);
2613 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2614 }
2615
2616 return SDValue();
2617}
2618
2619static bool
2621 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2622 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2623}
2624
2625/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2626/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2627/// are no common bits set in the operands).
2628SDValue DAGCombiner::visitADDLike(SDNode *N) {
2629 SDValue N0 = N->getOperand(0);
2630 SDValue N1 = N->getOperand(1);
2631 EVT VT = N0.getValueType();
2632 SDLoc DL(N);
2633
2634 // fold (add x, undef) -> undef
2635 if (N0.isUndef())
2636 return N0;
2637 if (N1.isUndef())
2638 return N1;
2639
2640 // fold (add c1, c2) -> c1+c2
2641 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2642 return C;
2643
2644 // canonicalize constant to RHS
2647 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2648
2649 if (areBitwiseNotOfEachother(N0, N1))
2650 return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT);
2651
2652 // fold vector ops
2653 if (VT.isVector()) {
2654 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2655 return FoldedVOp;
2656
2657 // fold (add x, 0) -> x, vector edition
2659 return N0;
2660 }
2661
2662 // fold (add x, 0) -> x
2663 if (isNullConstant(N1))
2664 return N0;
2665
2666 if (N0.getOpcode() == ISD::SUB) {
2667 SDValue N00 = N0.getOperand(0);
2668 SDValue N01 = N0.getOperand(1);
2669
2670 // fold ((A-c1)+c2) -> (A+(c2-c1))
2671 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2672 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2673
2674 // fold ((c1-A)+c2) -> (c1+c2)-A
2675 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2676 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2677 }
2678
2679 // add (sext i1 X), 1 -> zext (not i1 X)
2680 // We don't transform this pattern:
2681 // add (zext i1 X), -1 -> sext (not i1 X)
2682 // because most (?) targets generate better code for the zext form.
2683 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2684 isOneOrOneSplat(N1)) {
2685 SDValue X = N0.getOperand(0);
2686 if ((!LegalOperations ||
2687 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2689 X.getScalarValueSizeInBits() == 1) {
2690 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2691 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2692 }
2693 }
2694
2695 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2696 // iff (or x, c0) is equivalent to (add x, c0).
2697 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2698 // iff (xor x, c0) is equivalent to (add x, c0).
2699 if (DAG.isADDLike(N0)) {
2700 SDValue N01 = N0.getOperand(1);
2701 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2702 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2703 }
2704
2705 if (SDValue NewSel = foldBinOpIntoSelect(N))
2706 return NewSel;
2707
2708 // reassociate add
2709 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2710 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2711 return RADD;
2712
2713 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2714 // equivalent to (add x, c).
2715 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2716 // equivalent to (add x, c).
2717 // Do this optimization only when adding c does not introduce instructions
2718 // for adding carries.
2719 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2720 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2721 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2722 // If N0's type does not split or is a sign mask, it does not introduce
2723 // add carry.
2724 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2725 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2728 if (NoAddCarry)
2729 return DAG.getNode(
2730 ISD::ADD, DL, VT,
2731 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2732 N0.getOperand(1));
2733 }
2734 return SDValue();
2735 };
2736 if (SDValue Add = ReassociateAddOr(N0, N1))
2737 return Add;
2738 if (SDValue Add = ReassociateAddOr(N1, N0))
2739 return Add;
2740
2741 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2742 if (SDValue SD =
2743 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2744 return SD;
2745 }
2746
2747 SDValue A, B, C, D;
2748
2749 // fold ((0-A) + B) -> B-A
2750 if (sd_match(N0, m_Neg(m_Value(A))))
2751 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2752
2753 // fold (A + (0-B)) -> A-B
2754 if (sd_match(N1, m_Neg(m_Value(B))))
2755 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2756
2757 // fold (A+(B-A)) -> B
2758 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2759 return B;
2760
2761 // fold ((B-A)+A) -> B
2762 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2763 return B;
2764
2765 // fold ((A-B)+(C-A)) -> (C-B)
2766 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2768 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2769
2770 // fold ((A-B)+(B-C)) -> (A-C)
2771 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2773 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2774
2775 // fold (A+(B-(A+C))) to (B-C)
2776 // fold (A+(B-(C+A))) to (B-C)
2777 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2778 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2779
2780 // fold (A+((B-A)+or-C)) to (B+or-C)
2781 if (sd_match(N1,
2783 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2784 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2785
2786 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2787 if (sd_match(N0, m_OneUse(m_Sub(m_Value(A), m_Value(B)))) &&
2788 sd_match(N1, m_OneUse(m_Sub(m_Value(C), m_Value(D)))) &&
2790 return DAG.getNode(ISD::SUB, DL, VT,
2791 DAG.getNode(ISD::ADD, SDLoc(N0), VT, A, C),
2792 DAG.getNode(ISD::ADD, SDLoc(N1), VT, B, D));
2793
2794 // fold (add (umax X, C), -C) --> (usubsat X, C)
2795 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2796 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2797 return (!Max && !Op) ||
2798 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2799 };
2800 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2801 /*AllowUndefs*/ true))
2802 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2803 N0.getOperand(1));
2804 }
2805
2807 return SDValue(N, 0);
2808
2809 if (isOneOrOneSplat(N1)) {
2810 // fold (add (xor a, -1), 1) -> (sub 0, a)
2811 if (isBitwiseNot(N0))
2812 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2813 N0.getOperand(0));
2814
2815 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2816 if (N0.getOpcode() == ISD::ADD) {
2817 SDValue A, Xor;
2818
2819 if (isBitwiseNot(N0.getOperand(0))) {
2820 A = N0.getOperand(1);
2821 Xor = N0.getOperand(0);
2822 } else if (isBitwiseNot(N0.getOperand(1))) {
2823 A = N0.getOperand(0);
2824 Xor = N0.getOperand(1);
2825 }
2826
2827 if (Xor)
2828 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2829 }
2830
2831 // Look for:
2832 // add (add x, y), 1
2833 // And if the target does not like this form then turn into:
2834 // sub y, (xor x, -1)
2835 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2836 N0.hasOneUse() &&
2837 // Limit this to after legalization if the add has wrap flags
2838 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2839 !N->getFlags().hasNoSignedWrap()))) {
2840 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2841 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2842 }
2843 }
2844
2845 // (x - y) + -1 -> add (xor y, -1), x
2846 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2847 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2848 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2849 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2850 }
2851
2852 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2853 // This can help if the inner add has multiple uses.
2854 APInt CM, CA;
2855 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2856 if (VT.getScalarSizeInBits() <= 64) {
2858 m_ConstInt(CM)))) &&
2860 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2862 // If all the inputs are nuw, the outputs can be nuw. If all the input
2863 // are _also_ nsw the outputs can be too.
2864 if (N->getFlags().hasNoUnsignedWrap() &&
2865 N0->getFlags().hasNoUnsignedWrap() &&
2867 Flags.setNoUnsignedWrap(true);
2868 if (N->getFlags().hasNoSignedWrap() &&
2869 N0->getFlags().hasNoSignedWrap() &&
2871 Flags.setNoSignedWrap(true);
2872 }
2873 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2874 DAG.getConstant(CM, DL, VT), Flags);
2875 return DAG.getNode(
2876 ISD::ADD, DL, VT, Mul,
2877 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2878 }
2879 // Also look in case there is an intermediate add.
2880 if (sd_match(N0, m_OneUse(m_Add(
2882 m_ConstInt(CM))),
2883 m_Value(B)))) &&
2885 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2887 // If all the inputs are nuw, the outputs can be nuw. If all the input
2888 // are _also_ nsw the outputs can be too.
2889 SDValue OMul =
2890 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2891 if (N->getFlags().hasNoUnsignedWrap() &&
2892 N0->getFlags().hasNoUnsignedWrap() &&
2893 OMul->getFlags().hasNoUnsignedWrap() &&
2894 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2895 Flags.setNoUnsignedWrap(true);
2896 if (N->getFlags().hasNoSignedWrap() &&
2897 N0->getFlags().hasNoSignedWrap() &&
2898 OMul->getFlags().hasNoSignedWrap() &&
2899 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2900 Flags.setNoSignedWrap(true);
2901 }
2902 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2903 DAG.getConstant(CM, DL, VT), Flags);
2904 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2905 return DAG.getNode(
2906 ISD::ADD, DL, VT, Add,
2907 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2908 }
2909 }
2910 }
2911
2912 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2913 return Combined;
2914
2915 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2916 return Combined;
2917
2918 return SDValue();
2919}
2920
2921// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2922SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2923 SDValue N0 = N->getOperand(0);
2924 EVT VT = N0.getValueType();
2925 SDValue A, B;
2926
2927 if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
2929 m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2930 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2931 }
2932 if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
2934 m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
2935 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2936 }
2937
2938 return SDValue();
2939}
2940
2941SDValue DAGCombiner::visitADD(SDNode *N) {
2942 SDValue N0 = N->getOperand(0);
2943 SDValue N1 = N->getOperand(1);
2944 EVT VT = N0.getValueType();
2945 SDLoc DL(N);
2946
2947 if (SDValue Combined = visitADDLike(N))
2948 return Combined;
2949
2950 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2951 return V;
2952
2953 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2954 return V;
2955
2956 // Try to match AVGFLOOR fixedwidth pattern
2957 if (SDValue V = foldAddToAvg(N, DL))
2958 return V;
2959
2960 // fold (a+b) -> (a|b) iff a and b share no bits.
2961 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2962 DAG.haveNoCommonBitsSet(N0, N1)) {
2964 Flags.setDisjoint(true);
2965 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2966 }
2967
2968 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2969 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2970 const APInt &C0 = N0->getConstantOperandAPInt(0);
2971 const APInt &C1 = N1->getConstantOperandAPInt(0);
2972 return DAG.getVScale(DL, VT, C0 + C1);
2973 }
2974
2975 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2976 if (N0.getOpcode() == ISD::ADD &&
2977 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2978 N1.getOpcode() == ISD::VSCALE) {
2979 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2980 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2981 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2982 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2983 }
2984
2985 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2986 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2987 N1.getOpcode() == ISD::STEP_VECTOR) {
2988 const APInt &C0 = N0->getConstantOperandAPInt(0);
2989 const APInt &C1 = N1->getConstantOperandAPInt(0);
2990 APInt NewStep = C0 + C1;
2991 return DAG.getStepVector(DL, VT, NewStep);
2992 }
2993
2994 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2995 if (N0.getOpcode() == ISD::ADD &&
2997 N1.getOpcode() == ISD::STEP_VECTOR) {
2998 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2999 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3000 APInt NewStep = SV0 + SV1;
3001 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3002 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3003 }
3004
3005 return SDValue();
3006}
3007
3008SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3009 unsigned Opcode = N->getOpcode();
3010 SDValue N0 = N->getOperand(0);
3011 SDValue N1 = N->getOperand(1);
3012 EVT VT = N0.getValueType();
3013 bool IsSigned = Opcode == ISD::SADDSAT;
3014 SDLoc DL(N);
3015
3016 // fold (add_sat x, undef) -> -1
3017 if (N0.isUndef() || N1.isUndef())
3018 return DAG.getAllOnesConstant(DL, VT);
3019
3020 // fold (add_sat c1, c2) -> c3
3021 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3022 return C;
3023
3024 // canonicalize constant to RHS
3027 return DAG.getNode(Opcode, DL, VT, N1, N0);
3028
3029 // fold vector ops
3030 if (VT.isVector()) {
3031 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3032 return FoldedVOp;
3033
3034 // fold (add_sat x, 0) -> x, vector edition
3036 return N0;
3037 }
3038
3039 // fold (add_sat x, 0) -> x
3040 if (isNullConstant(N1))
3041 return N0;
3042
3043 // If it cannot overflow, transform into an add.
3044 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3045 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3046
3047 return SDValue();
3048}
3049
3051 bool ForceCarryReconstruction = false) {
3052 bool Masked = false;
3053
3054 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3055 while (true) {
3056 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3057 V = V.getOperand(0);
3058 continue;
3059 }
3060
3061 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3062 if (ForceCarryReconstruction)
3063 return V;
3064
3065 Masked = true;
3066 V = V.getOperand(0);
3067 continue;
3068 }
3069
3070 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3071 return V;
3072
3073 break;
3074 }
3075
3076 // If this is not a carry, return.
3077 if (V.getResNo() != 1)
3078 return SDValue();
3079
3080 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3081 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3082 return SDValue();
3083
3084 EVT VT = V->getValueType(0);
3085 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3086 return SDValue();
3087
3088 // If the result is masked, then no matter what kind of bool it is we can
3089 // return. If it isn't, then we need to make sure the bool type is either 0 or
3090 // 1 and not other values.
3091 if (Masked ||
3092 TLI.getBooleanContents(V.getValueType()) ==
3094 return V;
3095
3096 return SDValue();
3097}
3098
3099/// Given the operands of an add/sub operation, see if the 2nd operand is a
3100/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3101/// the opcode and bypass the mask operation.
3102static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3103 SelectionDAG &DAG, const SDLoc &DL) {
3104 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3105 N1 = N1.getOperand(0);
3106
3107 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3108 return SDValue();
3109
3110 EVT VT = N0.getValueType();
3111 SDValue N10 = N1.getOperand(0);
3112 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3113 N10 = N10.getOperand(0);
3114
3115 if (N10.getValueType() != VT)
3116 return SDValue();
3117
3118 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3119 return SDValue();
3120
3121 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3122 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3123 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3124}
3125
3126/// Helper for doing combines based on N0 and N1 being added to each other.
3127SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3128 SDNode *LocReference) {
3129 EVT VT = N0.getValueType();
3130 SDLoc DL(LocReference);
3131
3132 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3133 SDValue Y, N;
3134 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3135 return DAG.getNode(ISD::SUB, DL, VT, N0,
3136 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3137
3138 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3139 return V;
3140
3141 // Look for:
3142 // add (add x, 1), y
3143 // And if the target does not like this form then turn into:
3144 // sub y, (xor x, -1)
3145 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3146 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3147 // Limit this to after legalization if the add has wrap flags
3148 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3149 !N0->getFlags().hasNoSignedWrap()))) {
3150 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3151 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3152 }
3153
3154 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3155 // Hoist one-use subtraction by non-opaque constant:
3156 // (x - C) + y -> (x + y) - C
3157 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3158 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3159 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3160 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3161 }
3162 // Hoist one-use subtraction from non-opaque constant:
3163 // (C - x) + y -> (y - x) + C
3164 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3165 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3166 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3167 }
3168 }
3169
3170 // add (mul x, C), x -> mul x, C+1
3171 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3172 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3173 N0.hasOneUse()) {
3174 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3175 DAG.getConstant(1, DL, VT));
3176 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3177 }
3178
3179 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3180 // rather than 'add 0/-1' (the zext should get folded).
3181 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3182 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3183 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3185 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3186 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3187 }
3188
3189 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3190 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3191 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3192 if (TN->getVT() == MVT::i1) {
3193 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3194 DAG.getConstant(1, DL, VT));
3195 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3196 }
3197 }
3198
3199 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3200 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3201 N1.getResNo() == 0)
3202 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3203 N0, N1.getOperand(0), N1.getOperand(2));
3204
3205 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3207 if (SDValue Carry = getAsCarry(TLI, N1))
3208 return DAG.getNode(ISD::UADDO_CARRY, DL,
3209 DAG.getVTList(VT, Carry.getValueType()), N0,
3210 DAG.getConstant(0, DL, VT), Carry);
3211
3212 return SDValue();
3213}
3214
3215SDValue DAGCombiner::visitADDC(SDNode *N) {
3216 SDValue N0 = N->getOperand(0);
3217 SDValue N1 = N->getOperand(1);
3218 EVT VT = N0.getValueType();
3219 SDLoc DL(N);
3220
3221 // If the flag result is dead, turn this into an ADD.
3222 if (!N->hasAnyUseOfValue(1))
3223 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3224 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3225
3226 // canonicalize constant to RHS.
3227 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3228 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3229 if (N0C && !N1C)
3230 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3231
3232 // fold (addc x, 0) -> x + no carry out
3233 if (isNullConstant(N1))
3234 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3235 DL, MVT::Glue));
3236
3237 // If it cannot overflow, transform into an add.
3239 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3240 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3241
3242 return SDValue();
3243}
3244
3245/**
3246 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3247 * then the flip also occurs if computing the inverse is the same cost.
3248 * This function returns an empty SDValue in case it cannot flip the boolean
3249 * without increasing the cost of the computation. If you want to flip a boolean
3250 * no matter what, use DAG.getLogicalNOT.
3251 */
3253 const TargetLowering &TLI,
3254 bool Force) {
3255 if (Force && isa<ConstantSDNode>(V))
3256 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3257
3258 if (V.getOpcode() != ISD::XOR)
3259 return SDValue();
3260
3261 if (DAG.isBoolConstant(V.getOperand(1)) == true)
3262 return V.getOperand(0);
3263 if (Force && isConstOrConstSplat(V.getOperand(1), false))
3264 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3265 return SDValue();
3266}
3267
3268SDValue DAGCombiner::visitADDO(SDNode *N) {
3269 SDValue N0 = N->getOperand(0);
3270 SDValue N1 = N->getOperand(1);
3271 EVT VT = N0.getValueType();
3272 bool IsSigned = (ISD::SADDO == N->getOpcode());
3273
3274 EVT CarryVT = N->getValueType(1);
3275 SDLoc DL(N);
3276
3277 // If the flag result is dead, turn this into an ADD.
3278 if (!N->hasAnyUseOfValue(1))
3279 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3280 DAG.getUNDEF(CarryVT));
3281
3282 // canonicalize constant to RHS.
3285 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3286
3287 // fold (addo x, 0) -> x + no carry out
3288 if (isNullOrNullSplat(N1))
3289 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3290
3291 // If it cannot overflow, transform into an add.
3292 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3293 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3294 DAG.getConstant(0, DL, CarryVT));
3295
3296 if (IsSigned) {
3297 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3298 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3299 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3300 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3301 } else {
3302 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3303 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3304 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3305 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3306 return CombineTo(
3307 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3308 }
3309
3310 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3311 return Combined;
3312
3313 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3314 return Combined;
3315 }
3316
3317 return SDValue();
3318}
3319
3320SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3321 EVT VT = N0.getValueType();
3322 if (VT.isVector())
3323 return SDValue();
3324
3325 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3326 // If Y + 1 cannot overflow.
3327 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3328 SDValue Y = N1.getOperand(0);
3329 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3331 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3332 N1.getOperand(2));
3333 }
3334
3335 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3337 if (SDValue Carry = getAsCarry(TLI, N1))
3338 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3339 DAG.getConstant(0, SDLoc(N), VT), Carry);
3340
3341 return SDValue();
3342}
3343
3344SDValue DAGCombiner::visitADDE(SDNode *N) {
3345 SDValue N0 = N->getOperand(0);
3346 SDValue N1 = N->getOperand(1);
3347 SDValue CarryIn = N->getOperand(2);
3348
3349 // canonicalize constant to RHS
3350 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3351 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3352 if (N0C && !N1C)
3353 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3354 N1, N0, CarryIn);
3355
3356 // fold (adde x, y, false) -> (addc x, y)
3357 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3358 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3359
3360 return SDValue();
3361}
3362
3363SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3364 SDValue N0 = N->getOperand(0);
3365 SDValue N1 = N->getOperand(1);
3366 SDValue CarryIn = N->getOperand(2);
3367 SDLoc DL(N);
3368
3369 // canonicalize constant to RHS
3370 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3371 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3372 if (N0C && !N1C)
3373 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3374
3375 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3376 if (isNullConstant(CarryIn)) {
3377 if (!LegalOperations ||
3378 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3379 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3380 }
3381
3382 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3383 if (isNullConstant(N0) && isNullConstant(N1)) {
3384 EVT VT = N0.getValueType();
3385 EVT CarryVT = CarryIn.getValueType();
3386 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3387 AddToWorklist(CarryExt.getNode());
3388 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3389 DAG.getConstant(1, DL, VT)),
3390 DAG.getConstant(0, DL, CarryVT));
3391 }
3392
3393 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3394 return Combined;
3395
3396 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3397 return Combined;
3398
3399 // We want to avoid useless duplication.
3400 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3401 // not a binary operation, this is not really possible to leverage this
3402 // existing mechanism for it. However, if more operations require the same
3403 // deduplication logic, then it may be worth generalize.
3404 SDValue Ops[] = {N1, N0, CarryIn};
3405 SDNode *CSENode =
3406 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3407 if (CSENode)
3408 return SDValue(CSENode, 0);
3409
3410 return SDValue();
3411}
3412
3413/**
3414 * If we are facing some sort of diamond carry propagation pattern try to
3415 * break it up to generate something like:
3416 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3417 *
3418 * The end result is usually an increase in operation required, but because the
3419 * carry is now linearized, other transforms can kick in and optimize the DAG.
3420 *
3421 * Patterns typically look something like
3422 * (uaddo A, B)
3423 * / \
3424 * Carry Sum
3425 * | \
3426 * | (uaddo_carry *, 0, Z)
3427 * | /
3428 * \ Carry
3429 * | /
3430 * (uaddo_carry X, *, *)
3431 *
3432 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3433 * produce a combine with a single path for carry propagation.
3434 */
3436 SelectionDAG &DAG, SDValue X,
3437 SDValue Carry0, SDValue Carry1,
3438 SDNode *N) {
3439 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3440 return SDValue();
3441 if (Carry1.getOpcode() != ISD::UADDO)
3442 return SDValue();
3443
3444 SDValue Z;
3445
3446 /**
3447 * First look for a suitable Z. It will present itself in the form of
3448 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3449 */
3450 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3451 isNullConstant(Carry0.getOperand(1))) {
3452 Z = Carry0.getOperand(2);
3453 } else if (Carry0.getOpcode() == ISD::UADDO &&
3454 isOneConstant(Carry0.getOperand(1))) {
3455 EVT VT = Carry0->getValueType(1);
3456 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3457 } else {
3458 // We couldn't find a suitable Z.
3459 return SDValue();
3460 }
3461
3462
3463 auto cancelDiamond = [&](SDValue A,SDValue B) {
3464 SDLoc DL(N);
3465 SDValue NewY =
3466 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3467 Combiner.AddToWorklist(NewY.getNode());
3468 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3469 DAG.getConstant(0, DL, X.getValueType()),
3470 NewY.getValue(1));
3471 };
3472
3473 /**
3474 * (uaddo A, B)
3475 * |
3476 * Sum
3477 * |
3478 * (uaddo_carry *, 0, Z)
3479 */
3480 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3481 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3482 }
3483
3484 /**
3485 * (uaddo_carry A, 0, Z)
3486 * |
3487 * Sum
3488 * |
3489 * (uaddo *, B)
3490 */
3491 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3492 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3493 }
3494
3495 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3496 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3497 }
3498
3499 return SDValue();
3500}
3501
3502// If we are facing some sort of diamond carry/borrow in/out pattern try to
3503// match patterns like:
3504//
3505// (uaddo A, B) CarryIn
3506// | \ |
3507// | \ |
3508// PartialSum PartialCarryOutX /
3509// | | /
3510// | ____|____________/
3511// | / |
3512// (uaddo *, *) \________
3513// | \ \
3514// | \ |
3515// | PartialCarryOutY |
3516// | \ |
3517// | \ /
3518// AddCarrySum | ______/
3519// | /
3520// CarryOut = (or *, *)
3521//
3522// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3523//
3524// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3525//
3526// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3527// with a single path for carry/borrow out propagation.
3529 SDValue N0, SDValue N1, SDNode *N) {
3530 SDValue Carry0 = getAsCarry(TLI, N0);
3531 if (!Carry0)
3532 return SDValue();
3533 SDValue Carry1 = getAsCarry(TLI, N1);
3534 if (!Carry1)
3535 return SDValue();
3536
3537 unsigned Opcode = Carry0.getOpcode();
3538 if (Opcode != Carry1.getOpcode())
3539 return SDValue();
3540 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3541 return SDValue();
3542 // Guarantee identical type of CarryOut
3543 EVT CarryOutType = N->getValueType(0);
3544 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3545 CarryOutType != Carry1.getValue(1).getValueType())
3546 return SDValue();
3547
3548 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3549 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3550 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3551 std::swap(Carry0, Carry1);
3552
3553 // Check if nodes are connected in expected way.
3554 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3555 Carry1.getOperand(1) != Carry0.getValue(0))
3556 return SDValue();
3557
3558 // The carry in value must be on the righthand side for subtraction.
3559 unsigned CarryInOperandNum =
3560 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3561 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3562 return SDValue();
3563 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3564
3565 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3566 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3567 return SDValue();
3568
3569 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3570 CarryIn = getAsCarry(TLI, CarryIn, true);
3571 if (!CarryIn)
3572 return SDValue();
3573
3574 SDLoc DL(N);
3575 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3576 Carry1->getValueType(0));
3577 SDValue Merged =
3578 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3579 Carry0.getOperand(1), CarryIn);
3580
3581 // Please note that because we have proven that the result of the UADDO/USUBO
3582 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3583 // therefore prove that if the first UADDO/USUBO overflows, the second
3584 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3585 // maximum value.
3586 //
3587 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3588 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3589 //
3590 // This is important because it means that OR and XOR can be used to merge
3591 // carry flags; and that AND can return a constant zero.
3592 //
3593 // TODO: match other operations that can merge flags (ADD, etc)
3594 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3595 if (N->getOpcode() == ISD::AND)
3596 return DAG.getConstant(0, DL, CarryOutType);
3597 return Merged.getValue(1);
3598}
3599
3600SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3601 SDValue CarryIn, SDNode *N) {
3602 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3603 // carry.
3604 if (isBitwiseNot(N0))
3605 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3606 SDLoc DL(N);
3607 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3608 N0.getOperand(0), NotC);
3609 return CombineTo(
3610 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3611 }
3612
3613 // Iff the flag result is dead:
3614 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3615 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3616 // or the dependency between the instructions.
3617 if ((N0.getOpcode() == ISD::ADD ||
3618 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3619 N0.getValue(1) != CarryIn)) &&
3620 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3621 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3622 N0.getOperand(0), N0.getOperand(1), CarryIn);
3623
3624 /**
3625 * When one of the uaddo_carry argument is itself a carry, we may be facing
3626 * a diamond carry propagation. In which case we try to transform the DAG
3627 * to ensure linear carry propagation if that is possible.
3628 */
3629 if (auto Y = getAsCarry(TLI, N1)) {
3630 // Because both are carries, Y and Z can be swapped.
3631 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3632 return R;
3633 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3634 return R;
3635 }
3636
3637 return SDValue();
3638}
3639
3640SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3641 SDValue CarryIn, SDNode *N) {
3642 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3643 if (isBitwiseNot(N0)) {
3644 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3645 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3646 N0.getOperand(0), NotC);
3647 }
3648
3649 return SDValue();
3650}
3651
3652SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3653 SDValue N0 = N->getOperand(0);
3654 SDValue N1 = N->getOperand(1);
3655 SDValue CarryIn = N->getOperand(2);
3656 SDLoc DL(N);
3657
3658 // canonicalize constant to RHS
3659 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3660 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3661 if (N0C && !N1C)
3662 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3663
3664 // fold (saddo_carry x, y, false) -> (saddo x, y)
3665 if (isNullConstant(CarryIn)) {
3666 if (!LegalOperations ||
3667 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3668 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3669 }
3670
3671 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3672 return Combined;
3673
3674 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3675 return Combined;
3676
3677 return SDValue();
3678}
3679
3680// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3681// clamp/truncation if necessary.
3682static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3683 SDValue RHS, SelectionDAG &DAG,
3684 const SDLoc &DL) {
3685 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3686 "Illegal truncation");
3687
3688 if (DstVT == SrcVT)
3689 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3690
3691 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3692 // clamping RHS.
3694 DstVT.getScalarSizeInBits());
3695 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3696 return SDValue();
3697
3698 SDValue SatLimit =
3700 DstVT.getScalarSizeInBits()),
3701 DL, SrcVT);
3702 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3703 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3704 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3705 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3706}
3707
3708// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3709// usubsat(a,b), optionally as a truncated type.
3710SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3711 if (N->getOpcode() != ISD::SUB ||
3712 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3713 return SDValue();
3714
3715 EVT SubVT = N->getValueType(0);
3716 SDValue Op0 = N->getOperand(0);
3717 SDValue Op1 = N->getOperand(1);
3718
3719 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3720 // they may be converted to usubsat(a,b).
3721 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3722 SDValue MaxLHS = Op0.getOperand(0);
3723 SDValue MaxRHS = Op0.getOperand(1);
3724 if (MaxLHS == Op1)
3725 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3726 if (MaxRHS == Op1)
3727 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3728 }
3729
3730 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3731 SDValue MinLHS = Op1.getOperand(0);
3732 SDValue MinRHS = Op1.getOperand(1);
3733 if (MinLHS == Op0)
3734 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3735 if (MinRHS == Op0)
3736 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3737 }
3738
3739 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3740 if (Op1.getOpcode() == ISD::TRUNCATE &&
3741 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3742 Op1.getOperand(0).hasOneUse()) {
3743 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3744 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3745 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3746 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3747 DAG, DL);
3748 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3749 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3750 DAG, DL);
3751 }
3752
3753 return SDValue();
3754}
3755
3756// Since it may not be valid to emit a fold to zero for vector initializers
3757// check if we can before folding.
3758static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3759 SelectionDAG &DAG, bool LegalOperations) {
3760 if (!VT.isVector())
3761 return DAG.getConstant(0, DL, VT);
3762 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3763 return DAG.getConstant(0, DL, VT);
3764 return SDValue();
3765}
3766
3767SDValue DAGCombiner::visitSUB(SDNode *N) {
3768 SDValue N0 = N->getOperand(0);
3769 SDValue N1 = N->getOperand(1);
3770 EVT VT = N0.getValueType();
3771 unsigned BitWidth = VT.getScalarSizeInBits();
3772 SDLoc DL(N);
3773
3774 auto PeekThroughFreeze = [](SDValue N) {
3775 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3776 return N->getOperand(0);
3777 return N;
3778 };
3779
3780 // fold (sub x, x) -> 0
3781 // FIXME: Refactor this and xor and other similar operations together.
3782 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3783 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3784
3785 // fold (sub c1, c2) -> c3
3786 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3787 return C;
3788
3789 // fold vector ops
3790 if (VT.isVector()) {
3791 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3792 return FoldedVOp;
3793
3794 // fold (sub x, 0) -> x, vector edition
3796 return N0;
3797 }
3798
3799 if (SDValue NewSel = foldBinOpIntoSelect(N))
3800 return NewSel;
3801
3802 // fold (sub x, c) -> (add x, -c)
3804 return DAG.getNode(ISD::ADD, DL, VT, N0,
3805 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3806
3807 if (isNullOrNullSplat(N0)) {
3808 // Right-shifting everything out but the sign bit followed by negation is
3809 // the same as flipping arithmetic/logical shift type without the negation:
3810 // -(X >>u 31) -> (X >>s 31)
3811 // -(X >>s 31) -> (X >>u 31)
3812 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3814 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3815 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3816 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3817 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3818 }
3819 }
3820
3821 // 0 - X --> 0 if the sub is NUW.
3822 if (N->getFlags().hasNoUnsignedWrap())
3823 return N0;
3824
3826 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3827 // N1 must be 0 because negating the minimum signed value is undefined.
3828 if (N->getFlags().hasNoSignedWrap())
3829 return N0;
3830
3831 // 0 - X --> X if X is 0 or the minimum signed value.
3832 return N1;
3833 }
3834
3835 // Convert 0 - abs(x).
3836 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3838 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3839 return Result;
3840
3841 // Fold neg(splat(neg(x)) -> splat(x)
3842 if (VT.isVector()) {
3843 SDValue N1S = DAG.getSplatValue(N1, true);
3844 if (N1S && N1S.getOpcode() == ISD::SUB &&
3845 isNullConstant(N1S.getOperand(0)))
3846 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3847 }
3848 }
3849
3850 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3852 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3853
3854 // fold (A - (0-B)) -> A+B
3855 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3856 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3857
3858 // fold A-(A-B) -> B
3859 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3860 return N1.getOperand(1);
3861
3862 // fold (A+B)-A -> B
3863 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3864 return N0.getOperand(1);
3865
3866 // fold (A+B)-B -> A
3867 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3868 return N0.getOperand(0);
3869
3870 // fold (A+C1)-C2 -> A+(C1-C2)
3871 if (N0.getOpcode() == ISD::ADD) {
3872 SDValue N01 = N0.getOperand(1);
3873 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3874 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3875 }
3876
3877 // fold C2-(A+C1) -> (C2-C1)-A
3878 if (N1.getOpcode() == ISD::ADD) {
3879 SDValue N11 = N1.getOperand(1);
3880 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3881 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3882 }
3883
3884 // fold (A-C1)-C2 -> A-(C1+C2)
3885 if (N0.getOpcode() == ISD::SUB) {
3886 SDValue N01 = N0.getOperand(1);
3887 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3888 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3889 }
3890
3891 // fold (c1-A)-c2 -> (c1-c2)-A
3892 if (N0.getOpcode() == ISD::SUB) {
3893 SDValue N00 = N0.getOperand(0);
3894 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3895 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3896 }
3897
3898 SDValue A, B, C;
3899
3900 // fold ((A+(B+C))-B) -> A+C
3901 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3902 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3903
3904 // fold ((A+(B-C))-B) -> A-C
3905 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3906 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3907
3908 // fold ((A-(B-C))-C) -> A-B
3909 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3910 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3911
3912 // fold (A-(B-C)) -> A+(C-B)
3913 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3914 return DAG.getNode(ISD::ADD, DL, VT, N0,
3915 DAG.getNode(ISD::SUB, DL, VT, C, B));
3916
3917 // A - (A & B) -> A & (~B)
3918 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3919 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3920 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3921
3922 // fold (A - (-B * C)) -> (A + (B * C))
3923 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3924 return DAG.getNode(ISD::ADD, DL, VT, N0,
3925 DAG.getNode(ISD::MUL, DL, VT, B, C));
3926
3927 // If either operand of a sub is undef, the result is undef
3928 if (N0.isUndef())
3929 return N0;
3930 if (N1.isUndef())
3931 return N1;
3932
3933 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3934 return V;
3935
3936 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3937 return V;
3938
3939 // Try to match AVGCEIL fixedwidth pattern
3940 if (SDValue V = foldSubToAvg(N, DL))
3941 return V;
3942
3943 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3944 return V;
3945
3946 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3947 return V;
3948
3949 // (A - B) - 1 -> add (xor B, -1), A
3951 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3952
3953 // Look for:
3954 // sub y, (xor x, -1)
3955 // And if the target does not like this form then turn into:
3956 // add (add x, y), 1
3957 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3958 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3959 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3960 }
3961
3962 // Hoist one-use addition by non-opaque constant:
3963 // (x + C) - y -> (x - y) + C
3964 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
3965 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3966 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3967 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3968 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3969 }
3970 // y - (x + C) -> (y - x) - C
3971 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3972 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3973 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3974 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3975 }
3976 // (x - C) - y -> (x - y) - C
3977 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3978 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3979 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3980 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3981 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3982 }
3983 // (C - x) - y -> C - (x + y)
3984 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3985 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3986 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3987 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3988 }
3989
3990 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3991 // rather than 'sub 0/1' (the sext should get folded).
3992 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3993 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3994 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3995 TLI.getBooleanContents(VT) ==
3997 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3998 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3999 }
4000
4001 // fold B = sra (A, size(A)-1); sub (xor (A, B), B) -> (abs A)
4002 if ((!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4004 sd_match(N0, m_Xor(m_Specific(A), m_Specific(N1))))
4005 return DAG.getNode(ISD::ABS, DL, VT, A);
4006
4007 // If the relocation model supports it, consider symbol offsets.
4008 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4009 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4010 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4011 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4012 if (GA->getGlobal() == GB->getGlobal())
4013 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4014 DL, VT);
4015 }
4016
4017 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4018 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4019 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4020 if (TN->getVT() == MVT::i1) {
4021 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4022 DAG.getConstant(1, DL, VT));
4023 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4024 }
4025 }
4026
4027 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4028 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4029 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4030 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4031 }
4032
4033 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4034 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4035 APInt NewStep = -N1.getConstantOperandAPInt(0);
4036 return DAG.getNode(ISD::ADD, DL, VT, N0,
4037 DAG.getStepVector(DL, VT, NewStep));
4038 }
4039
4040 // Prefer an add for more folding potential and possibly better codegen:
4041 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4042 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4043 SDValue ShAmt = N1.getOperand(1);
4044 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4045 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4046 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4047 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4048 }
4049 }
4050
4051 // As with the previous fold, prefer add for more folding potential.
4052 // Subtracting SMIN/0 is the same as adding SMIN/0:
4053 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4054 if (N1.getOpcode() == ISD::SHL) {
4056 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4057 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4058 }
4059
4060 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4061 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4062 N0.getResNo() == 0 && N0.hasOneUse())
4063 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4064 N0.getOperand(0), N1, N0.getOperand(2));
4065
4067 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4068 if (SDValue Carry = getAsCarry(TLI, N0)) {
4069 SDValue X = N1;
4070 SDValue Zero = DAG.getConstant(0, DL, VT);
4071 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4072 return DAG.getNode(ISD::UADDO_CARRY, DL,
4073 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4074 Carry);
4075 }
4076 }
4077
4078 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4079 // sub C0, X --> xor X, C0
4080 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4081 if (!C0->isOpaque()) {
4082 const APInt &C0Val = C0->getAPIntValue();
4083 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4084 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4085 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4086 }
4087 }
4088
4089 // smax(a,b) - smin(a,b) --> abds(a,b)
4090 if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
4091 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4093 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4094
4095 // umax(a,b) - umin(a,b) --> abdu(a,b)
4096 if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
4097 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4099 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4100
4101 return SDValue();
4102}
4103
4104SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4105 unsigned Opcode = N->getOpcode();
4106 SDValue N0 = N->getOperand(0);
4107 SDValue N1 = N->getOperand(1);
4108 EVT VT = N0.getValueType();
4109 bool IsSigned = Opcode == ISD::SSUBSAT;
4110 SDLoc DL(N);
4111
4112 // fold (sub_sat x, undef) -> 0
4113 if (N0.isUndef() || N1.isUndef())
4114 return DAG.getConstant(0, DL, VT);
4115
4116 // fold (sub_sat x, x) -> 0
4117 if (N0 == N1)
4118 return DAG.getConstant(0, DL, VT);
4119
4120 // fold (sub_sat c1, c2) -> c3
4121 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4122 return C;
4123
4124 // fold vector ops
4125 if (VT.isVector()) {
4126 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4127 return FoldedVOp;
4128
4129 // fold (sub_sat x, 0) -> x, vector edition
4131 return N0;
4132 }
4133
4134 // fold (sub_sat x, 0) -> x
4135 if (isNullConstant(N1))
4136 return N0;
4137
4138 // If it cannot overflow, transform into an sub.
4139 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4140 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4141
4142 return SDValue();
4143}
4144
4145SDValue DAGCombiner::visitSUBC(SDNode *N) {
4146 SDValue N0 = N->getOperand(0);
4147 SDValue N1 = N->getOperand(1);
4148 EVT VT = N0.getValueType();
4149 SDLoc DL(N);
4150
4151 // If the flag result is dead, turn this into an SUB.
4152 if (!N->hasAnyUseOfValue(1))
4153 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4154 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4155
4156 // fold (subc x, x) -> 0 + no borrow
4157 if (N0 == N1)
4158 return CombineTo(N, DAG.getConstant(0, DL, VT),
4159 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4160
4161 // fold (subc x, 0) -> x + no borrow
4162 if (isNullConstant(N1))
4163 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4164
4165 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4166 if (isAllOnesConstant(N0))
4167 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4168 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4169
4170 return SDValue();
4171}
4172
4173SDValue DAGCombiner::visitSUBO(SDNode *N) {
4174 SDValue N0 = N->getOperand(0);
4175 SDValue N1 = N->getOperand(1);
4176 EVT VT = N0.getValueType();
4177 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4178
4179 EVT CarryVT = N->getValueType(1);
4180 SDLoc DL(N);
4181
4182 // If the flag result is dead, turn this into an SUB.
4183 if (!N->hasAnyUseOfValue(1))
4184 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4185 DAG.getUNDEF(CarryVT));
4186
4187 // fold (subo x, x) -> 0 + no borrow
4188 if (N0 == N1)
4189 return CombineTo(N, DAG.getConstant(0, DL, VT),
4190 DAG.getConstant(0, DL, CarryVT));
4191
4192 // fold (subox, c) -> (addo x, -c)
4194 if (IsSigned && !N1C->isMinSignedValue())
4195 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4196 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4197
4198 // fold (subo x, 0) -> x + no borrow
4199 if (isNullOrNullSplat(N1))
4200 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4201
4202 // If it cannot overflow, transform into an sub.
4203 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4204 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4205 DAG.getConstant(0, DL, CarryVT));
4206
4207 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4208 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4209 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4210 DAG.getConstant(0, DL, CarryVT));
4211
4212 return SDValue();
4213}
4214
4215SDValue DAGCombiner::visitSUBE(SDNode *N) {
4216 SDValue N0 = N->getOperand(0);
4217 SDValue N1 = N->getOperand(1);
4218 SDValue CarryIn = N->getOperand(2);
4219
4220 // fold (sube x, y, false) -> (subc x, y)
4221 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4222 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4223
4224 return SDValue();
4225}
4226
4227SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4228 SDValue N0 = N->getOperand(0);
4229 SDValue N1 = N->getOperand(1);
4230 SDValue CarryIn = N->getOperand(2);
4231
4232 // fold (usubo_carry x, y, false) -> (usubo x, y)
4233 if (isNullConstant(CarryIn)) {
4234 if (!LegalOperations ||
4235 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4236 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4237 }
4238
4239 return SDValue();
4240}
4241
4242SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4243 SDValue N0 = N->getOperand(0);
4244 SDValue N1 = N->getOperand(1);
4245 SDValue CarryIn = N->getOperand(2);
4246
4247 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4248 if (isNullConstant(CarryIn)) {
4249 if (!LegalOperations ||
4250 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4251 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4252 }
4253
4254 return SDValue();
4255}
4256
4257// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4258// UMULFIXSAT here.
4259SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4260 SDValue N0 = N->getOperand(0);
4261 SDValue N1 = N->getOperand(1);
4262 SDValue Scale = N->getOperand(2);
4263 EVT VT = N0.getValueType();
4264
4265 // fold (mulfix x, undef, scale) -> 0
4266 if (N0.isUndef() || N1.isUndef())
4267 return DAG.getConstant(0, SDLoc(N), VT);
4268
4269 // Canonicalize constant to RHS (vector doesn't have to splat)
4272 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4273
4274 // fold (mulfix x, 0, scale) -> 0
4275 if (isNullConstant(N1))
4276 return DAG.getConstant(0, SDLoc(N), VT);
4277
4278 return SDValue();
4279}
4280
4281template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
4282 SDValue N0 = N->getOperand(0);
4283 SDValue N1 = N->getOperand(1);
4284 EVT VT = N0.getValueType();
4285 unsigned BitWidth = VT.getScalarSizeInBits();
4286 SDLoc DL(N);
4287 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
4288 MatchContextClass Matcher(DAG, TLI, N);
4289
4290 // fold (mul x, undef) -> 0
4291 if (N0.isUndef() || N1.isUndef())
4292 return DAG.getConstant(0, DL, VT);
4293
4294 // fold (mul c1, c2) -> c1*c2
4295 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4296 return C;
4297
4298 // canonicalize constant to RHS (vector doesn't have to splat)
4301 return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
4302
4303 bool N1IsConst = false;
4304 bool N1IsOpaqueConst = false;
4305 APInt ConstValue1;
4306
4307 // fold vector ops
4308 if (VT.isVector()) {
4309 // TODO: Change this to use SimplifyVBinOp when it supports VP op.
4310 if (!UseVP)
4311 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4312 return FoldedVOp;
4313
4314 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4315 assert((!N1IsConst || ConstValue1.getBitWidth() == BitWidth) &&
4316 "Splat APInt should be element width");
4317 } else {
4318 N1IsConst = isa<ConstantSDNode>(N1);
4319 if (N1IsConst) {
4320 ConstValue1 = N1->getAsAPIntVal();
4321 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4322 }
4323 }
4324
4325 // fold (mul x, 0) -> 0
4326 if (N1IsConst && ConstValue1.isZero())
4327 return N1;
4328
4329 // fold (mul x, 1) -> x
4330 if (N1IsConst && ConstValue1.isOne())
4331 return N0;
4332
4333 if (!UseVP)
4334 if (SDValue NewSel = foldBinOpIntoSelect(N))
4335 return NewSel;
4336
4337 // fold (mul x, -1) -> 0-x
4338 if (N1IsConst && ConstValue1.isAllOnes())
4339 return Matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4340
4341 // fold (mul x, (1 << c)) -> x << c
4342 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4343 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4344 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4345 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4346 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4347 return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
4348 }
4349 }
4350
4351 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4352 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4353 unsigned Log2Val = (-ConstValue1).logBase2();
4354
4355 // FIXME: If the input is something that is easily negated (e.g. a
4356 // single-use add), we should put the negate there.
4357 return Matcher.getNode(
4358 ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4359 Matcher.getNode(ISD::SHL, DL, VT, N0,
4360 DAG.getShiftAmountConstant(Log2Val, VT, DL)));
4361 }
4362
4363 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4364 // hi result is in use in case we hit this mid-legalization.
4365 if (!UseVP) {
4366 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4367 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4368 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4369 // TODO: Can we match commutable operands with getNodeIfExists?
4370 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4371 if (LoHi->hasAnyUseOfValue(1))
4372 return SDValue(LoHi, 0);
4373 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4374 if (LoHi->hasAnyUseOfValue(1))
4375 return SDValue(LoHi, 0);
4376 }
4377 }
4378 }
4379
4380 // Try to transform:
4381 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4382 // mul x, (2^N + 1) --> add (shl x, N), x
4383 // mul x, (2^N - 1) --> sub (shl x, N), x
4384 // Examples: x * 33 --> (x << 5) + x
4385 // x * 15 --> (x << 4) - x
4386 // x * -33 --> -((x << 5) + x)
4387 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4388 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4389 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4390 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4391 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4392 // x * 0xf800 --> (x << 16) - (x << 11)
4393 // x * -0x8800 --> -((x << 15) + (x << 11))
4394 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4395 if (!UseVP && N1IsConst &&
4396 TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4397 // TODO: We could handle more general decomposition of any constant by
4398 // having the target set a limit on number of ops and making a
4399 // callback to determine that sequence (similar to sqrt expansion).
4400 unsigned MathOp = ISD::DELETED_NODE;
4401 APInt MulC = ConstValue1.abs();
4402 // The constant `2` should be treated as (2^0 + 1).
4403 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4404 MulC.lshrInPlace(TZeros);
4405 if ((MulC - 1).isPowerOf2())
4406 MathOp = ISD::ADD;
4407 else if ((MulC + 1).isPowerOf2())
4408 MathOp = ISD::SUB;
4409
4410 if (MathOp != ISD::DELETED_NODE) {
4411 unsigned ShAmt =
4412 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4413 ShAmt += TZeros;
4414 assert(ShAmt < BitWidth &&
4415 "multiply-by-constant generated out of bounds shift");
4416 SDValue Shl =
4417 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4418 SDValue R =
4419 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4420 DAG.getNode(ISD::SHL, DL, VT, N0,
4421 DAG.getConstant(TZeros, DL, VT)))
4422 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4423 if (ConstValue1.isNegative())
4424 R = DAG.getNegative(R, DL, VT);
4425 return R;
4426 }
4427 }
4428
4429 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4430 if (sd_context_match(N0, Matcher, m_Opc(ISD::SHL))) {
4431 SDValue N01 = N0.getOperand(1);
4432 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4433 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4434 }
4435
4436 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4437 // use.
4438 {
4439 SDValue Sh, Y;
4440
4441 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4442 if (sd_context_match(N0, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4444 Sh = N0; Y = N1;
4445 } else if (sd_context_match(N1, Matcher, m_OneUse(m_Opc(ISD::SHL))) &&
4447 Sh = N1; Y = N0;
4448 }
4449
4450 if (Sh.getNode()) {
4451 SDValue Mul = Matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4452 return Matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4453 }
4454 }
4455
4456 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4457 if (sd_context_match(N0, Matcher, m_Opc(ISD::ADD)) &&
4461 return Matcher.getNode(
4462 ISD::ADD, DL, VT,
4463 Matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4464 Matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4465
4466 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4468 if (!UseVP && N0.getOpcode() == ISD::VSCALE && NC1) {
4469 const APInt &C0 = N0.getConstantOperandAPInt(0);
4470 const APInt &C1 = NC1->getAPIntValue();
4471 return DAG.getVScale(DL, VT, C0 * C1);
4472 }
4473
4474 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4475 APInt MulVal;
4476 if (!UseVP && N0.getOpcode() == ISD::STEP_VECTOR &&
4477 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4478 const APInt &C0 = N0.getConstantOperandAPInt(0);
4479 APInt NewStep = C0 * MulVal;
4480 return DAG.getStepVector(DL, VT, NewStep);
4481 }
4482
4483 // Fold Y = sra (X, size(X)-1); mul (or (Y, 1), X) -> (abs X)
4484 SDValue X;
4485 if (!UseVP && (!LegalOperations || hasOperation(ISD::ABS, VT)) &&
4487 N, Matcher,
4489 m_Deferred(X)))) {
4490 return Matcher.getNode(ISD::ABS, DL, VT, X);
4491 }
4492
4493 // Fold ((mul x, 0/undef) -> 0,
4494 // (mul x, 1) -> x) -> x)
4495 // -> and(x, mask)
4496 // We can replace vectors with '0' and '1' factors with a clearing mask.
4497 if (VT.isFixedLengthVector()) {
4498 unsigned NumElts = VT.getVectorNumElements();
4499 SmallBitVector ClearMask;
4500 ClearMask.reserve(NumElts);
4501 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4502 if (!V || V->isZero()) {
4503 ClearMask.push_back(true);
4504 return true;
4505 }
4506 ClearMask.push_back(false);
4507 return V->isOne();
4508 };
4509 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4510 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4511 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4512 EVT LegalSVT = N1.getOperand(0).getValueType();
4513 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4514 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4516 for (unsigned I = 0; I != NumElts; ++I)
4517 if (ClearMask[I])
4518 Mask[I] = Zero;
4519 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4520 }
4521 }
4522
4523 // reassociate mul
4524 // TODO: Change reassociateOps to support vp ops.
4525 if (!UseVP)
4526 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4527 return RMUL;
4528
4529 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4530 // TODO: Change reassociateReduction to support vp ops.
4531 if (!UseVP)
4532 if (SDValue SD =
4533 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4534 return SD;
4535
4536 // Simplify the operands using demanded-bits information.
4538 return SDValue(N, 0);
4539
4540 return SDValue();
4541}
4542
4543/// Return true if divmod libcall is available.
4545 const TargetLowering &TLI) {
4546 RTLIB::Libcall LC;
4547 EVT NodeType = Node->getValueType(0);
4548 if (!NodeType.isSimple())
4549 return false;
4550 switch (NodeType.getSimpleVT().SimpleTy) {
4551 default: return false; // No libcall for vector types.
4552 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4553 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4554 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4555 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4556 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4557 }
4558
4559 return TLI.getLibcallName(LC) != nullptr;
4560}
4561
4562/// Issue divrem if both quotient and remainder are needed.
4563SDValue DAGCombiner::useDivRem(SDNode *Node) {
4564 if (Node->use_empty())
4565 return SDValue(); // This is a dead node, leave it alone.
4566
4567 unsigned Opcode = Node->getOpcode();
4568 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4569 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4570
4571 // DivMod lib calls can still work on non-legal types if using lib-calls.
4572 EVT VT = Node->getValueType(0);
4573 if (VT.isVector() || !VT.isInteger())
4574 return SDValue();
4575
4576 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4577 return SDValue();
4578
4579 // If DIVREM is going to get expanded into a libcall,
4580 // but there is no libcall available, then don't combine.
4581 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4583 return SDValue();
4584
4585 // If div is legal, it's better to do the normal expansion
4586 unsigned OtherOpcode = 0;
4587 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4588 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4589 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4590 return SDValue();
4591 } else {
4592 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4593 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4594 return SDValue();
4595 }
4596
4597 SDValue Op0 = Node->getOperand(0);
4598 SDValue Op1 = Node->getOperand(1);
4599 SDValue combined;
4600 for (SDNode *User : Op0->uses()) {
4601 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4602 User->use_empty())
4603 continue;
4604 // Convert the other matching node(s), too;
4605 // otherwise, the DIVREM may get target-legalized into something
4606 // target-specific that we won't be able to recognize.
4607 unsigned UserOpc = User->getOpcode();
4608 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4609 User->getOperand(0) == Op0 &&
4610 User->getOperand(1) == Op1) {
4611 if (!combined) {
4612 if (UserOpc == OtherOpcode) {
4613 SDVTList VTs = DAG.getVTList(VT, VT);
4614 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4615 } else if (UserOpc == DivRemOpc) {
4616 combined = SDValue(User, 0);
4617 } else {
4618 assert(UserOpc == Opcode);
4619 continue;
4620 }
4621 }
4622 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4623 CombineTo(User, combined);
4624 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4625 CombineTo(User, combined.getValue(1));
4626 }
4627 }
4628 return combined;
4629}
4630
4632 SDValue N0 = N->getOperand(0);
4633 SDValue N1 = N->getOperand(1);
4634 EVT VT = N->getValueType(0);
4635 SDLoc DL(N);
4636
4637 unsigned Opc = N->getOpcode();
4638 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4640
4641 // X / undef -> undef
4642 // X % undef -> undef
4643 // X / 0 -> undef
4644 // X % 0 -> undef
4645 // NOTE: This includes vectors where any divisor element is zero/undef.
4646 if (DAG.isUndef(Opc, {N0, N1}))
4647 return DAG.getUNDEF(VT);
4648
4649 // undef / X -> 0
4650 // undef % X -> 0
4651 if (N0.isUndef())
4652 return DAG.getConstant(0, DL, VT);
4653
4654 // 0 / X -> 0
4655 // 0 % X -> 0
4657 if (N0C && N0C->isZero())
4658 return N0;
4659
4660 // X / X -> 1
4661 // X % X -> 0
4662 if (N0 == N1)
4663 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4664
4665 // X / 1 -> X
4666 // X % 1 -> 0
4667 // If this is a boolean op (single-bit element type), we can't have
4668 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4669 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4670 // it's a 1.
4671 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4672 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4673
4674 return SDValue();
4675}
4676
4677SDValue DAGCombiner::visitSDIV(SDNode *N) {
4678 SDValue N0 = N->getOperand(0);
4679 SDValue N1 = N->getOperand(1);
4680 EVT VT = N->getValueType(0);
4681 EVT CCVT = getSetCCResultType(VT);
4682 SDLoc DL(N);
4683
4684 // fold (sdiv c1, c2) -> c1/c2
4685 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4686 return C;
4687
4688 // fold vector ops
4689 if (VT.isVector())
4690 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4691 return FoldedVOp;
4692
4693 // fold (sdiv X, -1) -> 0-X
4695 if (N1C && N1C->isAllOnes())
4696 return DAG.getNegative(N0, DL, VT);
4697
4698 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4699 if (N1C && N1C->isMinSignedValue())
4700 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4701 DAG.getConstant(1, DL, VT),
4702 DAG.getConstant(0, DL, VT));
4703
4704 if (SDValue V = simplifyDivRem(N, DAG))
4705 return V;
4706
4707 if (SDValue NewSel = foldBinOpIntoSelect(N))
4708 return NewSel;
4709
4710 // If we know the sign bits of both operands are zero, strength reduce to a
4711 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4712 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4713 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4714
4715 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4716 // If the corresponding remainder node exists, update its users with
4717 // (Dividend - (Quotient * Divisor).
4718 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4719 { N0, N1 })) {
4720 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4721 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4722 AddToWorklist(Mul.getNode());
4723 AddToWorklist(Sub.getNode());
4724 CombineTo(RemNode, Sub);
4725 }
4726 return V;
4727 }
4728
4729 // sdiv, srem -> sdivrem
4730 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4731 // true. Otherwise, we break the simplification logic in visitREM().
4733 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4734 if (SDValue DivRem = useDivRem(N))
4735 return DivRem;
4736
4737 return SDValue();
4738}
4739
4740static bool isDivisorPowerOfTwo(SDValue Divisor) {
4741 // Helper for determining whether a value is a power-2 constant scalar or a
4742 // vector of such elements.
4743 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4744 if (C->isZero() || C->isOpaque())
4745 return false;
4746 if (C->getAPIntValue().isPowerOf2())
4747 return true;
4748 if (C->getAPIntValue().isNegatedPowerOf2())
4749 return true;
4750 return false;
4751 };
4752
4753 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4754}
4755
4756SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4757 SDLoc DL(N);
4758 EVT VT = N->getValueType(0);
4759 EVT CCVT = getSetCCResultType(VT);
4760 unsigned BitWidth = VT.getScalarSizeInBits();
4761
4762 // fold (sdiv X, pow2) -> simple ops after legalize
4763 // FIXME: We check for the exact bit here because the generic lowering gives
4764 // better results in that case. The target-specific lowering should learn how
4765 // to handle exact sdivs efficiently.
4766 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4767 // Target-specific implementation of sdiv x, pow2.
4768 if (SDValue Res = BuildSDIVPow2(N))
4769 return Res;
4770
4771 // Create constants that are functions of the shift amount value.
4772 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4773 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4774 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4775 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4776 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4777 if (!isConstantOrConstantVector(Inexact))
4778 return SDValue();
4779
4780 // Splat the sign bit into the register
4781 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4782 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4783 AddToWorklist(Sign.getNode());
4784
4785 // Add (N0 < 0) ? abs2 - 1 : 0;
4786 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4787 AddToWorklist(Srl.getNode());
4788 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4789 AddToWorklist(Add.getNode());
4790 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4791 AddToWorklist(Sra.getNode());
4792
4793 // Special case: (sdiv X, 1) -> X
4794 // Special Case: (sdiv X, -1) -> 0-X
4795 SDValue One = DAG.getConstant(1, DL, VT);
4797 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4798 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4799 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4800 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4801
4802 // If dividing by a positive value, we're done. Otherwise, the result must
4803 // be negated.
4804 SDValue Zero = DAG.getConstant(0, DL, VT);
4805 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4806
4807 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4808 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4809 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4810 return Res;
4811 }
4812
4813 // If integer divide is expensive and we satisfy the requirements, emit an
4814 // alternate sequence. Targets may check function attributes for size/speed
4815 // trade-offs.
4818 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4819 if (SDValue Op = BuildSDIV(N))
4820 return Op;
4821
4822 return SDValue();
4823}
4824
4825SDValue DAGCombiner::visitUDIV(SDNode *N) {
4826 SDValue N0 = N->getOperand(0);
4827 SDValue N1 = N->getOperand(1);
4828 EVT VT = N->getValueType(0);
4829 EVT CCVT = getSetCCResultType(VT);
4830 SDLoc DL(N);
4831
4832 // fold (udiv c1, c2) -> c1/c2
4833 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4834 return C;
4835
4836 // fold vector ops
4837 if (VT.isVector())
4838 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4839 return FoldedVOp;
4840
4841 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4843 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4844 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4845 DAG.getConstant(1, DL, VT),
4846 DAG.getConstant(0, DL, VT));
4847 }
4848
4849 if (SDValue V = simplifyDivRem(N, DAG))
4850 return V;
4851
4852 if (SDValue NewSel = foldBinOpIntoSelect(N))
4853 return NewSel;
4854
4855 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4856 // If the corresponding remainder node exists, update its users with
4857 // (Dividend - (Quotient * Divisor).
4858 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4859 { N0, N1 })) {
4860 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4861 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4862 AddToWorklist(Mul.getNode());
4863 AddToWorklist(Sub.getNode());
4864 CombineTo(RemNode, Sub);
4865 }
4866 return V;
4867 }
4868
4869 // sdiv, srem -> sdivrem
4870 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4871 // true. Otherwise, we break the simplification logic in visitREM().
4873 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4874 if (SDValue DivRem = useDivRem(N))
4875 return DivRem;
4876
4877 return SDValue();
4878}
4879
4880SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4881 SDLoc DL(N);
4882 EVT VT = N->getValueType(0);
4883
4884 // fold (udiv x, (1 << c)) -> x >>u c
4885 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4886 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4887 AddToWorklist(LogBase2.getNode());
4888
4889 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4890 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4891 AddToWorklist(Trunc.getNode());
4892 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4893 }
4894 }
4895
4896 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4897 if (N1.getOpcode() == ISD::SHL) {
4898 SDValue N10 = N1.getOperand(0);
4899 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4900 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4901 AddToWorklist(LogBase2.getNode());
4902
4903 EVT ADDVT = N1.getOperand(1).getValueType();
4904 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4905 AddToWorklist(Trunc.getNode());
4906 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4907 AddToWorklist(Add.getNode());
4908 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4909 }
4910 }
4911 }
4912
4913 // fold (udiv x, c) -> alternate
4916 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4917 if (SDValue Op = BuildUDIV(N))
4918 return Op;
4919
4920 return SDValue();
4921}
4922
4923SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4924 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4925 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4926 // Target-specific implementation of srem x, pow2.
4927 if (SDValue Res = BuildSREMPow2(N))
4928 return Res;
4929 }
4930 return SDValue();
4931}
4932
4933// handles ISD::SREM and ISD::UREM
4934SDValue DAGCombiner::visitREM(SDNode *N) {
4935 unsigned Opcode = N->getOpcode();
4936 SDValue N0 = N->getOperand(0);
4937 SDValue N1 = N->getOperand(1);
4938 EVT VT = N->getValueType(0);
4939 EVT CCVT = getSetCCResultType(VT);
4940
4941 bool isSigned = (Opcode == ISD::SREM);
4942 SDLoc DL(N);
4943
4944 // fold (rem c1, c2) -> c1%c2
4945 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4946 return C;
4947
4948 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4949 // Freeze the numerator to avoid a miscompile with an undefined value.
4950 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4951 CCVT.isVector() == VT.isVector()) {
4952 SDValue F0 = DAG.getFreeze(N0);
4953 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4954 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4955 }
4956
4957 if (SDValue V = simplifyDivRem(N, DAG))
4958 return V;
4959
4960 if (SDValue NewSel = foldBinOpIntoSelect(N))
4961 return NewSel;
4962
4963 if (isSigned) {
4964 // If we know the sign bits of both operands are zero, strength reduce to a
4965 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4966 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4967 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4968 } else {
4969 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4970 // fold (urem x, pow2) -> (and x, pow2-1)
4971 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4972 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4973 AddToWorklist(Add.getNode());
4974 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4975 }
4976 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4977 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4978 // TODO: We should sink the following into isKnownToBePowerOfTwo
4979 // using a OrZero parameter analogous to our handling in ValueTracking.
4980 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4982 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4983 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4984 AddToWorklist(Add.getNode());
4985 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4986 }
4987 }
4988
4990
4991 // If X/C can be simplified by the division-by-constant logic, lower
4992 // X%C to the equivalent of X-X/C*C.
4993 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4994 // speculative DIV must not cause a DIVREM conversion. We guard against this
4995 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4996 // combine will not return a DIVREM. Regardless, checking cheapness here
4997 // makes sense since the simplification results in fatter code.
4998 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4999 if (isSigned) {
5000 // check if we can build faster implementation for srem
5001 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5002 return OptimizedRem;
5003 }
5004
5005 SDValue OptimizedDiv =
5006 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5007 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5008 // If the equivalent Div node also exists, update its users.
5009 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5010 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5011 { N0, N1 }))
5012 CombineTo(DivNode, OptimizedDiv);
5013 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5014 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5015 AddToWorklist(OptimizedDiv.getNode());
5016 AddToWorklist(Mul.getNode());
5017 return Sub;
5018 }
5019 }
5020
5021 // sdiv, srem -> sdivrem
5022 if (SDValue DivRem = useDivRem(N))
5023 return DivRem.getValue(1);
5024
5025 return SDValue();
5026}
5027
5028SDValue DAGCombiner::visitMULHS(SDNode *N) {
5029 SDValue N0 = N->getOperand(0);
5030 SDValue N1 = N->getOperand(1);
5031 EVT VT = N->getValueType(0);
5032 SDLoc DL(N);
5033
5034 // fold (mulhs c1, c2)
5035 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5036 return C;
5037
5038 // canonicalize constant to RHS.
5041 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5042
5043 if (VT.isVector()) {
5044 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5045 return FoldedVOp;
5046
5047 // fold (mulhs x, 0) -> 0
5048 // do not return N1, because undef node may exist.
5050 return DAG.getConstant(0, DL, VT);
5051 }
5052
5053 // fold (mulhs x, 0) -> 0
5054 if (isNullConstant(N1))
5055 return N1;
5056
5057 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5058 if (isOneConstant(N1))
5059 return DAG.getNode(
5060 ISD::SRA, DL, VT, N0,
5062
5063 // fold (mulhs x, undef) -> 0
5064 if (N0.isUndef() || N1.isUndef())
5065 return DAG.getConstant(0, DL, VT);
5066
5067 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5068 // plus a shift.
5069 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5070 !VT.isVector()) {
5071 MVT Simple = VT.getSimpleVT();
5072 unsigned SimpleSize = Simple.getSizeInBits();
5073 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5074 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5075 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5076 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5077 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5078 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5079 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5080 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5081 }
5082 }
5083
5084 return SDValue();
5085}
5086
5087SDValue DAGCombiner::visitMULHU(SDNode *N) {
5088 SDValue N0 = N->getOperand(0);
5089 SDValue N1 = N->getOperand(1);
5090 EVT VT = N->getValueType(0);
5091 SDLoc DL(N);
5092
5093 // fold (mulhu c1, c2)
5094 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5095 return C;
5096
5097 // canonicalize constant to RHS.
5100 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5101
5102 if (VT.isVector()) {
5103 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5104 return FoldedVOp;
5105
5106 // fold (mulhu x, 0) -> 0
5107 // do not return N1, because undef node may exist.
5109 return DAG.getConstant(0, DL, VT);
5110 }
5111
5112 // fold (mulhu x, 0) -> 0
5113 if (isNullConstant(N1))
5114 return N1;
5115
5116 // fold (mulhu x, 1) -> 0
5117 if (isOneConstant(N1))
5118 return DAG.getConstant(0, DL, VT);
5119
5120 // fold (mulhu x, undef) -> 0
5121 if (N0.isUndef() || N1.isUndef())
5122 return DAG.getConstant(0, DL, VT);
5123
5124 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5125 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5126 hasOperation(ISD::SRL, VT)) {
5127 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5128 unsigned NumEltBits = VT.getScalarSizeInBits();
5129 SDValue SRLAmt = DAG.getNode(
5130 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5131 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5132 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5133 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5134 }
5135 }
5136
5137 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5138 // plus a shift.
5139 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5140 !VT.isVector()) {
5141 MVT Simple = VT.getSimpleVT();
5142 unsigned SimpleSize = Simple.getSizeInBits();
5143 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5144 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5145 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5146 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5147 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5148 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5149 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5150 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5151 }
5152 }
5153
5154 // Simplify the operands using demanded-bits information.
5155 // We don't have demanded bits support for MULHU so this just enables constant
5156 // folding based on known bits.
5158 return SDValue(N, 0);
5159
5160 return SDValue();
5161}
5162
5163SDValue DAGCombiner::visitAVG(SDNode *N) {
5164 unsigned Opcode = N->getOpcode();
5165 SDValue N0 = N->getOperand(0);
5166 SDValue N1 = N->getOperand(1);
5167 EVT VT = N->getValueType(0);
5168 SDLoc DL(N);
5169 bool IsSigned = Opcode == ISD::AVGCEILS || Opcode == ISD::AVGFLOORS;
5170
5171 // fold (avg c1, c2)
5172 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5173 return C;
5174
5175 // canonicalize constant to RHS.
5178 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5179
5180 if (VT.isVector())
5181 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5182 return FoldedVOp;
5183
5184 // fold (avg x, undef) -> x
5185 if (N0.isUndef())
5186 return N1;
5187 if (N1.isUndef())
5188 return N0;
5189
5190 // fold (avg x, x) --> x
5191 if (N0 == N1 && Level >= AfterLegalizeTypes)
5192 return N0;
5193
5194 // fold (avgfloor x, 0) -> x >> 1
5195 SDValue X, Y;
5197 return DAG.getNode(ISD::SRA, DL, VT, X,
5198 DAG.getShiftAmountConstant(1, VT, DL));
5200 return DAG.getNode(ISD::SRL, DL, VT, X,
5201 DAG.getShiftAmountConstant(1, VT, DL));
5202
5203 // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
5204 // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y))
5205 if (!IsSigned &&
5206 sd_match(N, m_BinOp(Opcode, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) &&
5207 X.getValueType() == Y.getValueType() &&
5208 hasOperation(Opcode, X.getValueType())) {
5209 SDValue AvgU = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5210 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgU);
5211 }
5212 if (IsSigned &&
5213 sd_match(N, m_BinOp(Opcode, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
5214 X.getValueType() == Y.getValueType() &&
5215 hasOperation(Opcode, X.getValueType())) {
5216 SDValue AvgS = DAG.getNode(Opcode, DL, X.getValueType(), X, Y);
5217 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgS);
5218 }
5219
5220 // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0
5221 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0
5222 // Check if avgflooru isn't legal/custom but avgceilu is.
5223 if (Opcode == ISD::AVGFLOORU && !hasOperation(ISD::AVGFLOORU, VT) &&
5224 (!LegalOperations || hasOperation(ISD::AVGCEILU, VT))) {
5225 if (DAG.isKnownNeverZero(N1))
5226 return DAG.getNode(
5227 ISD::AVGCEILU, DL, VT, N0,
5228 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getAllOnesConstant(DL, VT)));
5229 if (DAG.isKnownNeverZero(N0))
5230 return DAG.getNode(
5231 ISD::AVGCEILU, DL, VT, N1,
5232 DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getAllOnesConstant(DL, VT)));
5233 }
5234
5235 return SDValue();
5236}
5237
5238SDValue DAGCombiner::visitABD(SDNode *N) {
5239 unsigned Opcode = N->getOpcode();
5240 SDValue N0 = N->getOperand(0);
5241 SDValue N1 = N->getOperand(1);
5242 EVT VT = N->getValueType(0);
5243 SDLoc DL(N);
5244
5245 // fold (abd c1, c2)
5246 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5247 return C;
5248
5249 // canonicalize constant to RHS.
5252 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5253
5254 if (VT.isVector())
5255 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5256 return FoldedVOp;
5257
5258 // fold (abd x, undef) -> 0
5259 if (N0.isUndef() || N1.isUndef())
5260 return DAG.getConstant(0, DL, VT);
5261
5262 // fold (abd x, x) -> 0
5263 if (N0 == N1)
5264 return DAG.getConstant(0, DL, VT);
5265
5266 SDValue X;
5267
5268 // fold (abds x, 0) -> abs x
5270 (!LegalOperations || hasOperation(ISD::ABS, VT)))
5271 return DAG.getNode(ISD::ABS, DL, VT, X);
5272
5273 // fold (abdu x, 0) -> x
5275 return X;
5276
5277 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5278 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5279 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5280 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5281
5282 return SDValue();
5283}
5284
5285/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5286/// give the opcodes for the two computations that are being performed. Return
5287/// true if a simplification was made.
5288SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5289 unsigned HiOp) {
5290 // If the high half is not needed, just compute the low half.
5291 bool HiExists = N->hasAnyUseOfValue(1);
5292 if (!HiExists && (!LegalOperations ||
5293 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5294 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5295 return CombineTo(N, Res, Res);
5296 }
5297
5298 // If the low half is not needed, just compute the high half.
5299 bool LoExists = N->hasAnyUseOfValue(0);
5300 if (!LoExists && (!LegalOperations ||
5301 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5302 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5303 return CombineTo(N, Res, Res);
5304 }
5305
5306 // If both halves are used, return as it is.
5307 if (LoExists && HiExists)
5308 return SDValue();
5309
5310 // If the two computed results can be simplified separately, separate them.
5311 if (LoExists) {
5312 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5313 AddToWorklist(Lo.getNode());
5314 SDValue LoOpt = combine(Lo.getNode());
5315 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5316 (!LegalOperations ||
5317 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5318 return CombineTo(N, LoOpt, LoOpt);
5319 }
5320
5321 if (HiExists) {
5322 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5323 AddToWorklist(Hi.getNode());
5324 SDValue HiOpt = combine(Hi.getNode());
5325 if (HiOpt.getNode() && HiOpt != Hi &&
5326 (!LegalOperations ||
5327 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5328 return CombineTo(N, HiOpt, HiOpt);
5329 }
5330
5331 return SDValue();
5332}
5333
5334SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5335 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5336 return Res;
5337
5338 SDValue N0 = N->getOperand(0);
5339 SDValue N1 = N->getOperand(1);
5340 EVT VT = N->getValueType(0);
5341 SDLoc DL(N);
5342
5343 // Constant fold.
5344 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5345 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5346
5347 // canonicalize constant to RHS (vector doesn't have to splat)
5350 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5351
5352 // If the type is twice as wide is legal, transform the mulhu to a wider
5353 // multiply plus a shift.
5354 if (VT.isSimple() && !VT.isVector()) {
5355 MVT Simple = VT.getSimpleVT();
5356 unsigned SimpleSize = Simple.getSizeInBits();
5357 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5358 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5359 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5360 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5361 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5362 // Compute the high part as N1.
5363 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5364 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5365 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5366 // Compute the low part as N0.
5367 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5368 return CombineTo(N, Lo, Hi);
5369 }
5370 }
5371
5372 return SDValue();
5373}
5374
5375SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5376 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5377 return Res;
5378
5379 SDValue N0 = N->getOperand(0);
5380 SDValue N1 = N->getOperand(1);
5381 EVT VT = N->getValueType(0);
5382 SDLoc DL(N);
5383
5384 // Constant fold.
5385 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5386 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5387
5388 // canonicalize constant to RHS (vector doesn't have to splat)
5391 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5392
5393 // (umul_lohi N0, 0) -> (0, 0)
5394 if (isNullConstant(N1)) {
5395 SDValue Zero = DAG.getConstant(0, DL, VT);
5396 return CombineTo(N, Zero, Zero);
5397 }
5398
5399 // (umul_lohi N0, 1) -> (N0, 0)
5400 if (isOneConstant(N1)) {
5401 SDValue Zero = DAG.getConstant(0, DL, VT);
5402 return CombineTo(N, N0, Zero);
5403 }
5404
5405 // If the type is twice as wide is legal, transform the mulhu to a wider
5406 // multiply plus a shift.
5407 if (VT.isSimple() && !VT.isVector()) {
5408 MVT Simple = VT.getSimpleVT();
5409 unsigned SimpleSize = Simple.getSizeInBits();
5410 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5411 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5412 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5413 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5414 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5415 // Compute the high part as N1.
5416 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5417 DAG.getShiftAmountConstant(SimpleSize, NewVT, DL));
5418 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5419 // Compute the low part as N0.
5420 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5421 return CombineTo(N, Lo, Hi);
5422 }
5423 }
5424
5425 return SDValue();
5426}
5427
5428SDValue DAGCombiner::visitMULO(SDNode *N) {
5429 SDValue N0 = N->getOperand(0);
5430 SDValue N1 = N->getOperand(1);
5431 EVT VT = N0.getValueType();
5432 bool IsSigned = (ISD::SMULO == N->getOpcode());
5433
5434 EVT CarryVT = N->getValueType(1);
5435 SDLoc DL(N);
5436
5439
5440 // fold operation with constant operands.
5441 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5442 // multiple results.
5443 if (N0C && N1C) {
5444 bool Overflow;
5445 APInt Result =
5446 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5447 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5448 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5449 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5450 }
5451
5452 // canonicalize constant to RHS.
5455 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5456
5457 // fold (mulo x, 0) -> 0 + no carry out
5458 if (isNullOrNullSplat(N1))
5459 return CombineTo(N, DAG.getConstant(0, DL, VT),
5460 DAG.getConstant(0, DL, CarryVT));
5461
5462 // (mulo x, 2) -> (addo x, x)
5463 // FIXME: This needs a freeze.
5464 if (N1C && N1C->getAPIntValue() == 2 &&
5465 (!IsSigned || VT.getScalarSizeInBits() > 2))
5466 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5467 N->getVTList(), N0, N0);
5468
5469 // A 1 bit SMULO overflows if both inputs are 1.
5470 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5471 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5472 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5473 DAG.getConstant(0, DL, VT), ISD::SETNE);
5474 return CombineTo(N, And, Cmp);
5475 }
5476
5477 // If it cannot overflow, transform into a mul.
5478 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5479 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5480 DAG.getConstant(0, DL, CarryVT));
5481 return SDValue();
5482}
5483
5484// Function to calculate whether the Min/Max pair of SDNodes (potentially
5485// swapped around) make a signed saturate pattern, clamping to between a signed
5486// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5487// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5488// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5489// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5491 SDValue N3, ISD::CondCode CC, unsigned &BW,
5492 bool &Unsigned, SelectionDAG &DAG) {
5493 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5494 ISD::CondCode CC) {
5495 // The compare and select operand should be the same or the select operands
5496 // should be truncated versions of the comparison.
5497 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5498 return 0;
5499 // The constants need to be the same or a truncated version of each other.
5502 if (!N1C || !N3C)
5503 return 0;
5504 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5505 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5506 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5507 return 0;
5508 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5509 };
5510
5511 // Check the initial value is a SMIN/SMAX equivalent.
5512 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5513 if (!Opcode0)
5514 return SDValue();
5515
5516 // We could only need one range check, if the fptosi could never produce
5517 // the upper value.
5518 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5519 if (isNullOrNullSplat(N3)) {
5520 EVT IntVT = N0.getValueType().getScalarType();
5521 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5522 if (FPVT.isSimple()) {
5523 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5524 const fltSemantics &Semantics = InputTy->getFltSemantics();
5525 uint32_t MinBitWidth =
5526 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5527 if (IntVT.getSizeInBits() >= MinBitWidth) {
5528 Unsigned = true;
5529 BW = PowerOf2Ceil(MinBitWidth);
5530 return N0;
5531 }
5532 }
5533 }
5534 }
5535
5536 SDValue N00, N01, N02, N03;
5537 ISD::CondCode N0CC;
5538 switch (N0.getOpcode()) {
5539 case ISD::SMIN:
5540 case ISD::SMAX:
5541 N00 = N02 = N0.getOperand(0);
5542 N01 = N03 = N0.getOperand(1);
5543 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5544 break;
5545 case ISD::SELECT_CC:
5546 N00 = N0.getOperand(0);
5547 N01 = N0.getOperand(1);
5548 N02 = N0.getOperand(2);
5549 N03 = N0.getOperand(3);
5550 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5551 break;
5552 case ISD::SELECT:
5553 case ISD::VSELECT:
5554 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5555 return SDValue();
5556 N00 = N0.getOperand(0).getOperand(0);
5557 N01 = N0.getOperand(0).getOperand(1);
5558 N02 = N0.getOperand(1);
5559 N03 = N0.getOperand(2);
5560 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5561 break;
5562 default:
5563 return SDValue();
5564 }
5565
5566 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5567 if (!Opcode1 || Opcode0 == Opcode1)
5568 return SDValue();
5569
5570 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5571 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5572 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5573 return SDValue();
5574
5575 const APInt &MinC = MinCOp->getAPIntValue();
5576 const APInt &MaxC = MaxCOp->getAPIntValue();
5577 APInt MinCPlus1 = MinC + 1;
5578 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5579 BW = MinCPlus1.exactLogBase2() + 1;
5580 Unsigned = false;
5581 return N02;
5582 }
5583
5584 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5585 BW = MinCPlus1.exactLogBase2();
5586 Unsigned = true;
5587 return N02;
5588 }
5589
5590 return SDValue();
5591}
5592
5595 SelectionDAG &DAG) {
5596 unsigned BW;
5597 bool Unsigned;
5598 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5599 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5600 return SDValue();
5601 EVT FPVT = Fp.getOperand(0).getValueType();
5602 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5603 if (FPVT.isVector())
5604 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5605 FPVT.getVectorElementCount());
5606 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5607 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5608 return SDValue();
5609 SDLoc DL(Fp);
5610 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5611 DAG.getValueType(NewVT.getScalarType()));
5612 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5613}
5614
5617 SelectionDAG &DAG) {
5618 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5619 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5620 // be truncated versions of the setcc (N0/N1).
5621 if ((N0 != N2 &&
5622 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5624 return SDValue();
5627 if (!N1C || !N3C)
5628 return SDValue();
5629 const APInt &C1 = N1C->getAPIntValue();
5630 const APInt &C3 = N3C->getAPIntValue();
5631 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5632 C1 != C3.zext(C1.getBitWidth()))
5633 return SDValue();
5634
5635 unsigned BW = (C1 + 1).exactLogBase2();
5636 EVT FPVT = N0.getOperand(0).getValueType();
5637 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5638 if (FPVT.isVector())
5639 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5640 FPVT.getVectorElementCount());
5642 FPVT, NewVT))
5643 return SDValue();
5644
5645 SDValue Sat =
5646 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5647 DAG.getValueType(NewVT.getScalarType()));
5648 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5649}
5650
5651SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5652 SDValue N0 = N->getOperand(0);
5653 SDValue N1 = N->getOperand(1);
5654 EVT VT = N0.getValueType();
5655 unsigned Opcode = N->getOpcode();
5656 SDLoc DL(N);
5657
5658 // fold operation with constant operands.
5659 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5660 return C;
5661
5662 // If the operands are the same, this is a no-op.
5663 if (N0 == N1)
5664 return N0;
5665
5666 // canonicalize constant to RHS
5669 return DAG.getNode(Opcode, DL, VT, N1, N0);
5670
5671 // fold vector ops
5672 if (VT.isVector())
5673 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5674 return FoldedVOp;
5675
5676 // reassociate minmax
5677 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5678 return RMINMAX;
5679
5680 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5681 // Only do this if:
5682 // 1. The current op isn't legal and the flipped is.
5683 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5684 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5685 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5686 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5687 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5688 unsigned AltOpcode;
5689 switch (Opcode) {
5690 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5691 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5692 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5693 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5694 default: llvm_unreachable("Unknown MINMAX opcode");
5695 }
5696 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5697 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5698 }
5699
5700 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5702 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5703 return S;
5704 if (Opcode == ISD::UMIN)
5705 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5706 return S;
5707
5708 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5709 auto ReductionOpcode = [](unsigned Opcode) {
5710 switch (Opcode) {
5711 case ISD::SMIN:
5712 return ISD::VECREDUCE_SMIN;
5713 case ISD::SMAX:
5714 return ISD::VECREDUCE_SMAX;
5715 case ISD::UMIN:
5716 return ISD::VECREDUCE_UMIN;
5717 case ISD::UMAX:
5718 return ISD::VECREDUCE_UMAX;
5719 default:
5720 llvm_unreachable("Unexpected opcode");
5721 }
5722 };
5723 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5724 SDLoc(N), VT, N0, N1))
5725 return SD;
5726
5727 // Simplify the operands using demanded-bits information.
5729 return SDValue(N, 0);
5730
5731 return SDValue();
5732}
5733
5734/// If this is a bitwise logic instruction and both operands have the same
5735/// opcode, try to sink the other opcode after the logic instruction.
5736SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5737 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5738 EVT VT = N0.getValueType();
5739 unsigned LogicOpcode = N->getOpcode();
5740 unsigned HandOpcode = N0.getOpcode();
5741 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5742 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5743
5744 // Bail early if none of these transforms apply.
5745 if (N0.getNumOperands() == 0)
5746 return SDValue();
5747
5748 // FIXME: We should check number of uses of the operands to not increase
5749 // the instruction count for all transforms.
5750
5751 // Handle size-changing casts (or sign_extend_inreg).
5752 SDValue X = N0.getOperand(0);
5753 SDValue Y = N1.getOperand(0);
5754 EVT XVT = X.getValueType();
5755 SDLoc DL(N);
5756 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5757 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5758 N0.getOperand(1) == N1.getOperand(1))) {
5759 // If both operands have other uses, this transform would create extra
5760 // instructions without eliminating anything.
5761 if (!N0.hasOneUse() && !N1.hasOneUse())
5762 return SDValue();
5763 // We need matching integer source types.
5764 if (XVT != Y.getValueType())
5765 return SDValue();
5766 // Don't create an illegal op during or after legalization. Don't ever
5767 // create an unsupported vector op.
5768 if ((VT.isVector() || LegalOperations) &&
5769 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5770 return SDValue();
5771 // Avoid infinite looping with PromoteIntBinOp.
5772 // TODO: Should we apply desirable/legal constraints to all opcodes?
5773 if ((HandOpcode == ISD::ANY_EXTEND ||
5774 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5775 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5776 return SDValue();
5777 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5778 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5779 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5780 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5781 return DAG.getNode(HandOpcode, DL, VT, Logic);
5782 }
5783
5784 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5785 if (HandOpcode == ISD::TRUNCATE) {
5786 // If both operands have other uses, this transform would create extra
5787 // instructions without eliminating anything.
5788 if (!N0.hasOneUse() && !N1.hasOneUse())
5789 return SDValue();
5790 // We need matching source types.
5791 if (XVT != Y.getValueType())
5792 return SDValue();
5793 // Don't create an illegal op during or after legalization.
5794 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5795 return SDValue();
5796 // Be extra careful sinking truncate. If it's free, there's no benefit in
5797 // widening a binop. Also, don't create a logic op on an illegal type.
5798 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5799 return SDValue();
5800 if (!TLI.isTypeLegal(XVT))
5801 return SDValue();
5802 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5803 return DAG.getNode(HandOpcode, DL, VT, Logic);
5804 }
5805
5806 // For binops SHL/SRL/SRA/AND:
5807 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5808 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5809 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5810 N0.getOperand(1) == N1.getOperand(1)) {
5811 // If either operand has other uses, this transform is not an improvement.
5812 if (!N0.hasOneUse() || !N1.hasOneUse())
5813 return SDValue();
5814 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5815 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5816 }
5817
5818 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5819 if (HandOpcode == ISD::BSWAP) {
5820 // If either operand has other uses, this transform is not an improvement.
5821 if (!N0.hasOneUse() || !N1.hasOneUse())
5822 return SDValue();
5823 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5824 return DAG.getNode(HandOpcode, DL, VT, Logic);
5825 }
5826
5827 // For funnel shifts FSHL/FSHR:
5828 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5829 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5830 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5831 N0.getOperand(2) == N1.getOperand(2)) {
5832 if (!N0.hasOneUse() || !N1.hasOneUse())
5833 return SDValue();
5834 SDValue X1 = N0.getOperand(1);
5835 SDValue Y1 = N1.getOperand(1);
5836 SDValue S = N0.getOperand(2);
5837 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5838 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5839 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5840 }
5841
5842 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5843 // Only perform this optimization up until type legalization, before
5844 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5845 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5846 // we don't want to undo this promotion.
5847 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5848 // on scalars.
5849 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5850 Level <= AfterLegalizeTypes) {
5851 // Input types must be integer and the same.
5852 if (XVT.isInteger() && XVT == Y.getValueType() &&
5853 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5854 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5855 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5856 return DAG.getNode(HandOpcode, DL, VT, Logic);
5857 }
5858 }
5859
5860 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5861 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5862 // If both shuffles use the same mask, and both shuffle within a single
5863 // vector, then it is worthwhile to move the swizzle after the operation.
5864 // The type-legalizer generates this pattern when loading illegal
5865 // vector types from memory. In many cases this allows additional shuffle
5866 // optimizations.
5867 // There are other cases where moving the shuffle after the xor/and/or
5868 // is profitable even if shuffles don't perform a swizzle.
5869 // If both shuffles use the same mask, and both shuffles have the same first
5870 // or second operand, then it might still be profitable to move the shuffle
5871 // after the xor/and/or operation.
5872 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5873 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5874 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5875 assert(X.getValueType() == Y.getValueType() &&
5876 "Inputs to shuffles are not the same type");
5877
5878 // Check that both shuffles use the same mask. The masks are known to be of
5879 // the same length because the result vector type is the same.
5880 // Check also that shuffles have only one use to avoid introducing extra
5881 // instructions.
5882 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5883 !SVN0->getMask().equals(SVN1->getMask()))
5884 return SDValue();
5885
5886 // Don't try to fold this node if it requires introducing a
5887 // build vector of all zeros that might be illegal at this stage.
5888 SDValue ShOp = N0.getOperand(1);
5889 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5890 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5891
5892 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5893 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5894 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5895 N0.getOperand(0), N1.getOperand(0));
5896 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5897 }
5898
5899 // Don't try to fold this node if it requires introducing a
5900 // build vector of all zeros that might be illegal at this stage.
5901 ShOp = N0.getOperand(0);
5902 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5903 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5904
5905 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5906 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5907 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5908 N1.getOperand(1));
5909 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5910 }
5911 }
5912
5913 return SDValue();
5914}
5915
5916/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5917SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5918 const SDLoc &DL) {
5919 SDValue LL, LR, RL, RR, N0CC, N1CC;
5920 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5921 !isSetCCEquivalent(N1, RL, RR, N1CC))
5922 return SDValue();
5923
5924 assert(N0.getValueType() == N1.getValueType() &&
5925 "Unexpected operand types for bitwise logic op");
5926 assert(LL.getValueType() == LR.getValueType() &&
5927 RL.getValueType() == RR.getValueType() &&
5928 "Unexpected operand types for setcc");
5929
5930 // If we're here post-legalization or the logic op type is not i1, the logic
5931 // op type must match a setcc result type. Also, all folds require new
5932 // operations on the left and right operands, so those types must match.
5933 EVT VT = N0.getValueType();
5934 EVT OpVT = LL.getValueType();
5935 if (LegalOperations || VT.getScalarType() != MVT::i1)
5936 if (VT != getSetCCResultType(OpVT))
5937 return SDValue();
5938 if (OpVT != RL.getValueType())
5939 return SDValue();
5940
5941 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5942 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5943 bool IsInteger = OpVT.isInteger();
5944 if (LR == RR && CC0 == CC1 && IsInteger) {
5945 bool IsZero = isNullOrNullSplat(LR);
5946 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5947
5948 // All bits clear?
5949 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5950 // All sign bits clear?
5951 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5952 // Any bits set?
5953 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5954 // Any sign bits set?
5955 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5956
5957 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5958 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5959 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5960 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5961 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5962 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5963 AddToWorklist(Or.getNode());
5964 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5965 }
5966
5967 // All bits set?
5968 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5969 // All sign bits set?
5970 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5971 // Any bits clear?
5972 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5973 // Any sign bits clear?
5974 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5975
5976 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5977 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5978 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5979 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5980 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5981 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5982 AddToWorklist(And.getNode());
5983 return DAG.getSetCC(DL, VT, And, LR, CC1);
5984 }
5985 }
5986
5987 // TODO: What is the 'or' equivalent of this fold?
5988 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5989 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5990 IsInteger && CC0 == ISD::SETNE &&
5991 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5992 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5993 SDValue One = DAG.getConstant(1, DL, OpVT);
5994 SDValue Two = DAG.getConstant(2, DL, OpVT);
5995 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5996 AddToWorklist(Add.getNode());
5997 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5998 }
5999
6000 // Try more general transforms if the predicates match and the only user of
6001 // the compares is the 'and' or 'or'.
6002 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6003 N0.hasOneUse() && N1.hasOneUse()) {
6004 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6005 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6006 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6007 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6008 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6009 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6010 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6011 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6012 }
6013
6014 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6015 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6016 // Match a shared variable operand and 2 non-opaque constant operands.
6017 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6018 // The difference of the constants must be a single bit.
6019 const APInt &CMax =
6020 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6021 const APInt &CMin =
6022 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6023 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6024 };
6025 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6026 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6027 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6028 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6029 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6030 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6031 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6032 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6033 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6034 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6035 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6036 }
6037 }
6038 }
6039
6040 // Canonicalize equivalent operands to LL == RL.
6041 if (LL == RR && LR == RL) {
6043 std::swap(RL, RR);
6044 }
6045
6046 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6047 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6048 if (LL == RL && LR == RR) {
6049 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6050 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6051 if (NewCC != ISD::SETCC_INVALID &&
6052 (!LegalOperations ||
6053 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6054 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6055 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6056 }
6057
6058 return SDValue();
6059}
6060
6061static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6062 SelectionDAG &DAG) {
6063 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6064}
6065
6066static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6067 SelectionDAG &DAG) {
6068 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6069}
6070
6071static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6072 ISD::CondCode CC, unsigned OrAndOpcode,
6073 SelectionDAG &DAG,
6074 bool isFMAXNUMFMINNUM_IEEE,
6075 bool isFMAXNUMFMINNUM) {
6076 // The optimization cannot be applied for all the predicates because
6077 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6078 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6079 // applied at all if one of the operands is a signaling NaN.
6080
6081 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6082 // are non NaN values.
6083 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6084 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6085 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6086 isFMAXNUMFMINNUM_IEEE
6089 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6090 (OrAndOpcode == ISD::OR)) ||
6091 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6092 (OrAndOpcode == ISD::AND)))
6093 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6094 isFMAXNUMFMINNUM_IEEE
6097 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6098 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6099 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6100 // that there are not any sNaNs, then the optimization is not valid
6101 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6102 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6103 // we can prove that we do not have any sNaNs, then we can do the
6104 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6105 // cases.
6106 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6107 (OrAndOpcode == ISD::OR)) ||
6108 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6109 (OrAndOpcode == ISD::AND)))
6110 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6111 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6112 isFMAXNUMFMINNUM_IEEE
6115 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6116 (OrAndOpcode == ISD::OR)) ||
6117 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6118 (OrAndOpcode == ISD::AND)))
6119 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6120 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6121 isFMAXNUMFMINNUM_IEEE
6124 return ISD::DELETED_NODE;
6125}
6126
6129 assert(
6130 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6131 "Invalid Op to combine SETCC with");
6132
6133 // TODO: Search past casts/truncates.
6134 SDValue LHS = LogicOp->getOperand(0);
6135 SDValue RHS = LogicOp->getOperand(1);
6136 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6137 !LHS->hasOneUse() || !RHS->hasOneUse())
6138 return SDValue();
6139
6140 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6142 LogicOp, LHS.getNode(), RHS.getNode());
6143
6144 SDValue LHS0 = LHS->getOperand(0);
6145 SDValue RHS0 = RHS->getOperand(0);
6146 SDValue LHS1 = LHS->getOperand(1);
6147 SDValue RHS1 = RHS->getOperand(1);
6148 // TODO: We don't actually need a splat here, for vectors we just need the
6149 // invariants to hold for each element.
6150 auto *LHS1C = isConstOrConstSplat(LHS1);
6151 auto *RHS1C = isConstOrConstSplat(RHS1);
6152 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6153 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6154 EVT VT = LogicOp->getValueType(0);
6155 EVT OpVT = LHS0.getValueType();
6156 SDLoc DL(LogicOp);
6157
6158 // Check if the operands of an and/or operation are comparisons and if they
6159 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6160 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6161 // sequence will be replaced with min-cmp sequence:
6162 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6163 // and and-cmp-cmp will be replaced with max-cmp sequence:
6164 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6165 // The optimization does not work for `==` or `!=` .
6166 // The two comparisons should have either the same predicate or the
6167 // predicate of one of the comparisons is the opposite of the other one.
6168 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6170 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6172 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6173 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6174 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6175 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6176 (OpVT.isFloatingPoint() &&
6177 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6179 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6180 CCL != ISD::SETTRUE &&
6181 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6182
6183 SDValue CommonValue, Operand1, Operand2;
6185 if (CCL == CCR) {
6186 if (LHS0 == RHS0) {
6187 CommonValue = LHS0;
6188 Operand1 = LHS1;
6189 Operand2 = RHS1;
6191 } else if (LHS1 == RHS1) {
6192 CommonValue = LHS1;
6193 Operand1 = LHS0;
6194 Operand2 = RHS0;
6195 CC = CCL;
6196 }
6197 } else {
6198 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6199 if (LHS0 == RHS1) {
6200 CommonValue = LHS0;
6201 Operand1 = LHS1;
6202 Operand2 = RHS0;
6203 CC = CCR;
6204 } else if (RHS0 == LHS1) {
6205 CommonValue = LHS1;
6206 Operand1 = LHS0;
6207 Operand2 = RHS1;
6208 CC = CCL;
6209 }
6210 }
6211
6212 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6213 // handle it using OR/AND.
6214 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6216 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6218
6219 if (CC != ISD::SETCC_INVALID) {
6220 unsigned NewOpcode = ISD::DELETED_NODE;
6221 bool IsSigned = isSignedIntSetCC(CC);
6222 if (OpVT.isInteger()) {
6223 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6224 CC == ISD::SETLT || CC == ISD::SETULT);
6225 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6226 if (IsLess == IsOr)
6227 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6228 else
6229 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6230 } else if (OpVT.isFloatingPoint())
6231 NewOpcode =
6232 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6233 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6234
6235 if (NewOpcode != ISD::DELETED_NODE) {
6236 SDValue MinMaxValue =
6237 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6238 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6239 }
6240 }
6241 }
6242
6243 if (TargetPreference == AndOrSETCCFoldKind::None)
6244 return SDValue();
6245
6246 if (CCL == CCR &&
6247 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6248 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6249 const APInt &APLhs = LHS1C->getAPIntValue();
6250 const APInt &APRhs = RHS1C->getAPIntValue();
6251
6252 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6253 // case this is just a compare).
6254 if (APLhs == (-APRhs) &&
6255 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6256 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6257 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6258 // (icmp eq A, C) | (icmp eq A, -C)
6259 // -> (icmp eq Abs(A), C)
6260 // (icmp ne A, C) & (icmp ne A, -C)
6261 // -> (icmp ne Abs(A), C)
6262 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6263 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6264 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6265 } else if (TargetPreference &
6267
6268 // AndOrSETCCFoldKind::AddAnd:
6269 // A == C0 | A == C1
6270 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6271 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6272 // A != C0 & A != C1
6273 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6274 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6275
6276 // AndOrSETCCFoldKind::NotAnd:
6277 // A == C0 | A == C1
6278 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6279 // -> ~A & smin(C0, C1) == 0
6280 // A != C0 & A != C1
6281 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6282 // -> ~A & smin(C0, C1) != 0
6283
6284 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6285 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6286 APInt Dif = MaxC - MinC;
6287 if (!Dif.isZero() && Dif.isPowerOf2()) {
6288 if (MaxC.isAllOnes() &&
6289 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6290 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6291 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6292 DAG.getConstant(MinC, DL, OpVT));
6293 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6294 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6295 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6296
6297 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6298 DAG.getConstant(-MinC, DL, OpVT));
6299 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6300 DAG.getConstant(~Dif, DL, OpVT));
6301 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6302 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6303 }
6304 }
6305 }
6306 }
6307
6308 return SDValue();
6309}
6310
6311// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6312// We canonicalize to the `select` form in the middle end, but the `and` form
6313// gets better codegen and all tested targets (arm, x86, riscv)
6315 const SDLoc &DL, SelectionDAG &DAG) {
6316 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6317 if (!isNullConstant(F))
6318 return SDValue();
6319
6320 EVT CondVT = Cond.getValueType();
6321 if (TLI.getBooleanContents(CondVT) !=
6323 return SDValue();
6324
6325 if (T.getOpcode() != ISD::AND)
6326 return SDValue();
6327
6328 if (!isOneConstant(T.getOperand(1)))
6329 return SDValue();
6330
6331 EVT OpVT = T.getValueType();
6332
6333 SDValue CondMask =
6334 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6335 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6336}
6337
6338/// This contains all DAGCombine rules which reduce two values combined by
6339/// an And operation to a single value. This makes them reusable in the context
6340/// of visitSELECT(). Rules involving constants are not included as
6341/// visitSELECT() already handles those cases.
6342SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6343 EVT VT = N1.getValueType();
6344 SDLoc DL(N);
6345
6346 // fold (and x, undef) -> 0
6347 if (N0.isUndef() || N1.isUndef())
6348 return DAG.getConstant(0, DL, VT);
6349
6350 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6351 return V;
6352
6353 // Canonicalize:
6354 // and(x, add) -> and(add, x)
6355 if (N1.getOpcode() == ISD::ADD)
6356 std::swap(N0, N1);
6357
6358 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6359 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6360 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6361 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6362 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6363 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6364 // immediate for an add, but it is legal if its top c2 bits are set,
6365 // transform the ADD so the immediate doesn't need to be materialized
6366 // in a register.
6367 APInt ADDC = ADDI->getAPIntValue();
6368 APInt SRLC = SRLI->getAPIntValue();
6369 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6370 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6372 SRLC.getZExtValue());
6373 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6374 ADDC |= Mask;
6375 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6376 SDLoc DL0(N0);
6377 SDValue NewAdd =
6378 DAG.getNode(ISD::ADD, DL0, VT,
6379 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6380 CombineTo(N0.getNode(), NewAdd);
6381 // Return N so it doesn't get rechecked!
6382 return SDValue(N, 0);
6383 }
6384 }
6385 }
6386 }
6387 }
6388 }
6389
6390 return SDValue();
6391}
6392
6393bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6394 EVT LoadResultTy, EVT &ExtVT) {
6395 if (!AndC->getAPIntValue().isMask())
6396 return false;
6397
6398 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6399
6400 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6401 EVT LoadedVT = LoadN->getMemoryVT();
6402
6403 if (ExtVT == LoadedVT &&
6404 (!LegalOperations ||
6405 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6406 // ZEXTLOAD will match without needing to change the size of the value being
6407 // loaded.
6408 return true;
6409 }
6410
6411 // Do not change the width of a volatile or atomic loads.
6412 if (!LoadN->isSimple())
6413 return false;
6414
6415 // Do not generate loads of non-round integer types since these can
6416 // be expensive (and would be wrong if the type is not byte sized).
6417 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6418 return false;
6419
6420 if (LegalOperations &&
6421 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6422 return false;
6423
6424 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6425 return false;
6426
6427 return true;
6428}
6429
6430bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6431 ISD::LoadExtType ExtType, EVT &MemVT,
6432 unsigned ShAmt) {
6433 if (!LDST)
6434 return false;
6435 // Only allow byte offsets.
6436 if (ShAmt % 8)
6437 return false;
6438
6439 // Do not generate loads of non-round integer types since these can
6440 // be expensive (and would be wrong if the type is not byte sized).
6441 if (!MemVT.isRound())
6442 return false;
6443
6444 // Don't change the width of a volatile or atomic loads.
6445 if (!LDST->isSimple())
6446 return false;
6447
6448 EVT LdStMemVT = LDST->getMemoryVT();
6449
6450 // Bail out when changing the scalable property, since we can't be sure that
6451 // we're actually narrowing here.
6452 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6453 return false;
6454
6455 // Verify that we are actually reducing a load width here.
6456 if (LdStMemVT.bitsLT(MemVT))
6457 return false;
6458
6459 // Ensure that this isn't going to produce an unsupported memory access.
6460 if (ShAmt) {
6461 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6462 const unsigned ByteShAmt = ShAmt / 8;
6463 const Align LDSTAlign = LDST->getAlign();
6464 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6465 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6466 LDST->getAddressSpace(), NarrowAlign,
6467 LDST->getMemOperand()->getFlags()))
6468 return false;
6469 }
6470
6471 // It's not possible to generate a constant of extended or untyped type.
6472 EVT PtrType = LDST->getBasePtr().getValueType();
6473 if (PtrType == MVT::Untyped || PtrType.isExtended())
6474 return false;
6475
6476 if (isa<LoadSDNode>(LDST)) {
6477 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6478 // Don't transform one with multiple uses, this would require adding a new
6479 // load.
6480 if (!SDValue(Load, 0).hasOneUse())
6481 return false;
6482
6483 if (LegalOperations &&
6484 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6485 return false;
6486
6487 // For the transform to be legal, the load must produce only two values
6488 // (the value loaded and the chain). Don't transform a pre-increment
6489 // load, for example, which produces an extra value. Otherwise the
6490 // transformation is not equivalent, and the downstream logic to replace
6491 // uses gets things wrong.
6492 if (Load->getNumValues() > 2)
6493 return false;
6494
6495 // If the load that we're shrinking is an extload and we're not just
6496 // discarding the extension we can't simply shrink the load. Bail.
6497 // TODO: It would be possible to merge the extensions in some cases.
6498 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6499 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6500 return false;
6501
6502 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6503 return false;
6504 } else {
6505 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6506 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6507 // Can't write outside the original store
6508 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6509 return false;
6510
6511 if (LegalOperations &&
6512 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6513 return false;
6514 }
6515 return true;
6516}
6517
6518bool DAGCombiner::SearchForAndLoads(SDNode *N,
6520 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6521 ConstantSDNode *Mask,
6522 SDNode *&NodeToMask) {
6523 // Recursively search for the operands, looking for loads which can be
6524 // narrowed.
6525 for (SDValue Op : N->op_values()) {
6526 if (Op.getValueType().isVector())
6527 return false;
6528
6529 // Some constants may need fixing up later if they are too large.
6530 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6531 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6532 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6533 NodesWithConsts.insert(N);
6534 continue;
6535 }
6536
6537 if (!Op.hasOneUse())
6538 return false;
6539
6540 switch(Op.getOpcode()) {
6541 case ISD::LOAD: {
6542 auto *Load = cast<LoadSDNode>(Op);
6543 EVT ExtVT;
6544 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6545 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6546
6547 // ZEXTLOAD is already small enough.
6548 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6549 ExtVT.bitsGE(Load->getMemoryVT()))
6550 continue;
6551
6552 // Use LE to convert equal sized loads to zext.
6553 if (ExtVT.bitsLE(Load->getMemoryVT()))
6554 Loads.push_back(Load);
6555
6556 continue;
6557 }
6558 return false;
6559 }
6560 case ISD::ZERO_EXTEND:
6561 case ISD::AssertZext: {
6562 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6563 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6564 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6565 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6566 Op.getOperand(0).getValueType();
6567
6568 // We can accept extending nodes if the mask is wider or an equal
6569 // width to the original type.
6570 if (ExtVT.bitsGE(VT))
6571 continue;
6572 break;
6573 }
6574 case ISD::OR:
6575 case ISD::XOR:
6576 case ISD::AND:
6577 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6578 NodeToMask))
6579 return false;
6580 continue;
6581 }
6582
6583 // Allow one node which will masked along with any loads found.
6584 if (NodeToMask)
6585 return false;
6586
6587 // Also ensure that the node to be masked only produces one data result.
6588 NodeToMask = Op.getNode();
6589 if (NodeToMask->getNumValues() > 1) {
6590 bool HasValue = false;
6591 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6592 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6593 if (VT != MVT::Glue && VT != MVT::Other) {
6594 if (HasValue) {
6595 NodeToMask = nullptr;
6596 return false;
6597 }
6598 HasValue = true;
6599 }
6600 }
6601 assert(HasValue && "Node to be masked has no data result?");
6602 }
6603 }
6604 return true;
6605}
6606
6607bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6608 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6609 if (!Mask)
6610 return false;
6611
6612 if (!Mask->getAPIntValue().isMask())
6613 return false;
6614
6615 // No need to do anything if the and directly uses a load.
6616 if (isa<LoadSDNode>(N->getOperand(0)))
6617 return false;
6618
6620 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6621 SDNode *FixupNode = nullptr;
6622 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6623 if (Loads.empty())
6624 return false;
6625
6626 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6627 SDValue MaskOp = N->getOperand(1);
6628
6629 // If it exists, fixup the single node we allow in the tree that needs
6630 // masking.
6631 if (FixupNode) {
6632 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6633 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6634 FixupNode->getValueType(0),
6635 SDValue(FixupNode, 0), MaskOp);
6636 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6637 if (And.getOpcode() == ISD ::AND)
6638 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6639 }
6640
6641 // Narrow any constants that need it.
6642 for (auto *LogicN : NodesWithConsts) {
6643 SDValue Op0 = LogicN->getOperand(0);
6644 SDValue Op1 = LogicN->getOperand(1);
6645
6646 if (isa<ConstantSDNode>(Op0))
6647 Op0 =
6648 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6649
6650 if (isa<ConstantSDNode>(Op1))
6651 Op1 =
6652 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6653
6654 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6655 std::swap(Op0, Op1);
6656
6657 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6658 }
6659
6660 // Create narrow loads.
6661 for (auto *Load : Loads) {
6662 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6663 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6664 SDValue(Load, 0), MaskOp);
6665 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6666 if (And.getOpcode() == ISD ::AND)
6667 And = SDValue(
6668 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6669 SDValue NewLoad = reduceLoadWidth(And.getNode());
6670 assert(NewLoad &&
6671 "Shouldn't be masking the load if it can't be narrowed");
6672 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6673 }
6674 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6675 return true;
6676 }
6677 return false;
6678}
6679
6680// Unfold
6681// x & (-1 'logical shift' y)
6682// To
6683// (x 'opposite logical shift' y) 'logical shift' y
6684// if it is better for performance.
6685SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6686 assert(N->getOpcode() == ISD::AND);
6687
6688 SDValue N0 = N->getOperand(0);
6689 SDValue N1 = N->getOperand(1);
6690
6691 // Do we actually prefer shifts over mask?
6693 return SDValue();
6694
6695 // Try to match (-1 '[outer] logical shift' y)
6696 unsigned OuterShift;
6697 unsigned InnerShift; // The opposite direction to the OuterShift.
6698 SDValue Y; // Shift amount.
6699 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6700 if (!M.hasOneUse())
6701 return false;
6702 OuterShift = M->getOpcode();
6703 if (OuterShift == ISD::SHL)
6704 InnerShift = ISD::SRL;
6705 else if (OuterShift == ISD::SRL)
6706 InnerShift = ISD::SHL;
6707 else
6708 return false;
6709 if (!isAllOnesConstant(M->getOperand(0)))
6710 return false;
6711 Y = M->getOperand(1);
6712 return true;
6713 };
6714
6715 SDValue X;
6716 if (matchMask(N1))
6717 X = N0;
6718 else if (matchMask(N0))
6719 X = N1;
6720 else
6721 return SDValue();
6722
6723 SDLoc DL(N);
6724 EVT VT = N->getValueType(0);
6725
6726 // tmp = x 'opposite logical shift' y
6727 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6728 // ret = tmp 'logical shift' y
6729 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6730
6731 return T1;
6732}
6733
6734/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6735/// For a target with a bit test, this is expected to become test + set and save
6736/// at least 1 instruction.
6738 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6739
6740 // Look through an optional extension.
6741 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6742 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6743 And0 = And0.getOperand(0);
6744 if (!isOneConstant(And1) || !And0.hasOneUse())
6745 return SDValue();
6746
6747 SDValue Src = And0;
6748
6749 // Attempt to find a 'not' op.
6750 // TODO: Should we favor test+set even without the 'not' op?
6751 bool FoundNot = false;
6752 if (isBitwiseNot(Src)) {
6753 FoundNot = true;
6754 Src = Src.getOperand(0);
6755
6756 // Look though an optional truncation. The source operand may not be the
6757 // same type as the original 'and', but that is ok because we are masking
6758 // off everything but the low bit.
6759 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6760 Src = Src.getOperand(0);
6761 }
6762
6763 // Match a shift-right by constant.
6764 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6765 return SDValue();
6766
6767 // This is probably not worthwhile without a supported type.
6768 EVT SrcVT = Src.getValueType();
6769 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6770 if (!TLI.isTypeLegal(SrcVT))
6771 return SDValue();
6772
6773 // We might have looked through casts that make this transform invalid.
6774 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6775 SDValue ShiftAmt = Src.getOperand(1);
6776 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6777 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6778 return SDValue();
6779
6780 // Set source to shift source.
6781 Src = Src.getOperand(0);
6782
6783 // Try again to find a 'not' op.
6784 // TODO: Should we favor test+set even with two 'not' ops?
6785 if (!FoundNot) {
6786 if (!isBitwiseNot(Src))
6787 return SDValue();
6788 Src = Src.getOperand(0);
6789 }
6790
6791 if (!TLI.hasBitTest(Src, ShiftAmt))
6792 return SDValue();
6793
6794 // Turn this into a bit-test pattern using mask op + setcc:
6795 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6796 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6797 SDLoc DL(And);
6798 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6799 EVT CCVT =
6800 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6801 SDValue Mask = DAG.getConstant(
6802 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6803 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6804 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6805 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6806 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6807}
6808
6809/// For targets that support usubsat, match a bit-hack form of that operation
6810/// that ends in 'and' and convert it.
6812 EVT VT = N->getValueType(0);
6813 unsigned BitWidth = VT.getScalarSizeInBits();
6814 APInt SignMask = APInt::getSignMask(BitWidth);
6815
6816 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6817 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6818 // xor/add with SMIN (signmask) are logically equivalent.
6819 SDValue X;
6820 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6822 m_SpecificInt(BitWidth - 1))))) &&
6825 m_SpecificInt(BitWidth - 1))))))
6826 return SDValue();
6827
6828 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6829 DAG.getConstant(SignMask, DL, VT));
6830}
6831
6832/// Given a bitwise logic operation N with a matching bitwise logic operand,
6833/// fold a pattern where 2 of the source operands are identically shifted
6834/// values. For example:
6835/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6837 SelectionDAG &DAG) {
6838 unsigned LogicOpcode = N->getOpcode();
6839 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6840 "Expected bitwise logic operation");
6841
6842 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6843 return SDValue();
6844
6845 // Match another bitwise logic op and a shift.
6846 unsigned ShiftOpcode = ShiftOp.getOpcode();
6847 if (LogicOp.getOpcode() != LogicOpcode ||
6848 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6849 ShiftOpcode == ISD::SRA))
6850 return SDValue();
6851
6852 // Match another shift op inside the first logic operand. Handle both commuted
6853 // possibilities.
6854 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6855 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6856 SDValue X1 = ShiftOp.getOperand(0);
6857 SDValue Y = ShiftOp.getOperand(1);
6858 SDValue X0, Z;
6859 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6860 LogicOp.getOperand(0).getOperand(1) == Y) {
6861 X0 = LogicOp.getOperand(0).getOperand(0);
6862 Z = LogicOp.getOperand(1);
6863 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6864 LogicOp.getOperand(1).getOperand(1) == Y) {
6865 X0 = LogicOp.getOperand(1).getOperand(0);
6866 Z = LogicOp.getOperand(0);
6867 } else {
6868 return SDValue();
6869 }
6870
6871 EVT VT = N->getValueType(0);
6872 SDLoc DL(N);
6873 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6874 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6875 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6876}
6877
6878/// Given a tree of logic operations with shape like
6879/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6880/// try to match and fold shift operations with the same shift amount.
6881/// For example:
6882/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6883/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6885 SDValue RightHand, SelectionDAG &DAG) {
6886 unsigned LogicOpcode = N->getOpcode();
6887 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6888 "Expected bitwise logic operation");
6889 if (LeftHand.getOpcode() != LogicOpcode ||
6890 RightHand.getOpcode() != LogicOpcode)
6891 return SDValue();
6892 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6893 return SDValue();
6894
6895 // Try to match one of following patterns:
6896 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6897 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6898 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6899 // itself.
6900 SDValue CombinedShifts, W;
6901 SDValue R0 = RightHand.getOperand(0);
6902 SDValue R1 = RightHand.getOperand(1);
6903 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6904 W = R1;
6905 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6906 W = R0;
6907 else
6908 return SDValue();
6909
6910 EVT VT = N->getValueType(0);
6911 SDLoc DL(N);
6912 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6913}
6914
6915SDValue DAGCombiner::visitAND(SDNode *N) {
6916 SDValue N0 = N->getOperand(0);
6917 SDValue N1 = N->getOperand(1);
6918 EVT VT = N1.getValueType();
6919 SDLoc DL(N);
6920
6921 // x & x --> x
6922 if (N0 == N1)
6923 return N0;
6924
6925 // fold (and c1, c2) -> c1&c2
6926 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6927 return C;
6928
6929 // canonicalize constant to RHS
6932 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6933
6934 if (areBitwiseNotOfEachother(N0, N1))
6935 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6936
6937 // fold vector ops
6938 if (VT.isVector()) {
6939 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6940 return FoldedVOp;
6941
6942 // fold (and x, 0) -> 0, vector edition
6944 // do not return N1, because undef node may exist in N1
6946 N1.getValueType());
6947
6948 // fold (and x, -1) -> x, vector edition
6950 return N0;
6951
6952 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6953 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6954 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6955 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6956 N1.hasOneUse()) {
6957 EVT LoadVT = MLoad->getMemoryVT();
6958 EVT ExtVT = VT;
6959 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6960 // For this AND to be a zero extension of the masked load the elements
6961 // of the BuildVec must mask the bottom bits of the extended element
6962 // type
6963 uint64_t ElementSize =
6965 if (Splat->getAPIntValue().isMask(ElementSize)) {
6966 SDValue NewLoad = DAG.getMaskedLoad(
6967 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6968 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6969 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6970 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6971 bool LoadHasOtherUsers = !N0.hasOneUse();
6972 CombineTo(N, NewLoad);
6973 if (LoadHasOtherUsers)
6974 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6975 return SDValue(N, 0);
6976 }
6977 }
6978 }
6979 }
6980
6981 // fold (and x, -1) -> x
6982 if (isAllOnesConstant(N1))
6983 return N0;
6984
6985 // if (and x, c) is known to be zero, return 0
6986 unsigned BitWidth = VT.getScalarSizeInBits();
6989 return DAG.getConstant(0, DL, VT);
6990
6991 if (SDValue R = foldAndOrOfSETCC(N, DAG))
6992 return R;
6993
6994 if (SDValue NewSel = foldBinOpIntoSelect(N))
6995 return NewSel;
6996
6997 // reassociate and
6998 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
6999 return RAND;
7000
7001 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7002 if (SDValue SD =
7003 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7004 return SD;
7005
7006 // fold (and (or x, C), D) -> D if (C & D) == D
7007 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7008 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7009 };
7010 if (N0.getOpcode() == ISD::OR &&
7011 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7012 return N1;
7013
7014 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7015 SDValue N0Op0 = N0.getOperand(0);
7016 EVT SrcVT = N0Op0.getValueType();
7017 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7018 APInt Mask = ~N1C->getAPIntValue();
7019 Mask = Mask.trunc(SrcBitWidth);
7020
7021 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7022 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7023 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7024
7025 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7026 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7027 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7028 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7029 TLI.isNarrowingProfitable(VT, SrcVT))
7030 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7031 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7032 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7033 }
7034
7035 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7036 if (ISD::isExtOpcode(N0.getOpcode())) {
7037 unsigned ExtOpc = N0.getOpcode();
7038 SDValue N0Op0 = N0.getOperand(0);
7039 if (N0Op0.getOpcode() == ISD::AND &&
7040 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7043 N0->hasOneUse() && N0Op0->hasOneUse()) {
7044 SDValue NewMask =
7045 DAG.getNode(ISD::AND, DL, VT, N1,
7046 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
7047 return DAG.getNode(ISD::AND, DL, VT,
7048 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7049 NewMask);
7050 }
7051 }
7052
7053 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7054 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7055 // already be zero by virtue of the width of the base type of the load.
7056 //
7057 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7058 // more cases.
7059 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7061 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7062 N0.getOperand(0).getResNo() == 0) ||
7063 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7064 auto *Load =
7065 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7066
7067 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7068 // This can be a pure constant or a vector splat, in which case we treat the
7069 // vector as a scalar and use the splat value.
7072 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7073 Constant = C->getAPIntValue();
7074 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7075 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7076 APInt SplatValue, SplatUndef;
7077 unsigned SplatBitSize;
7078 bool HasAnyUndefs;
7079 // Endianness should not matter here. Code below makes sure that we only
7080 // use the result if the SplatBitSize is a multiple of the vector element
7081 // size. And after that we AND all element sized parts of the splat
7082 // together. So the end result should be the same regardless of in which
7083 // order we do those operations.
7084 const bool IsBigEndian = false;
7085 bool IsSplat =
7086 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7087 HasAnyUndefs, EltBitWidth, IsBigEndian);
7088
7089 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7090 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7091 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7092 // Undef bits can contribute to a possible optimisation if set, so
7093 // set them.
7094 SplatValue |= SplatUndef;
7095
7096 // The splat value may be something like "0x00FFFFFF", which means 0 for
7097 // the first vector value and FF for the rest, repeating. We need a mask
7098 // that will apply equally to all members of the vector, so AND all the
7099 // lanes of the constant together.
7100 Constant = APInt::getAllOnes(EltBitWidth);
7101 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7102 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7103 }
7104 }
7105
7106 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7107 // actually legal and isn't going to get expanded, else this is a false
7108 // optimisation.
7109 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7110 Load->getValueType(0),
7111 Load->getMemoryVT());
7112
7113 // Resize the constant to the same size as the original memory access before
7114 // extension. If it is still the AllOnesValue then this AND is completely
7115 // unneeded.
7116 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7117
7118 bool B;
7119 switch (Load->getExtensionType()) {
7120 default: B = false; break;
7121 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7122 case ISD::ZEXTLOAD:
7123 case ISD::NON_EXTLOAD: B = true; break;
7124 }
7125
7126 if (B && Constant.isAllOnes()) {
7127 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7128 // preserve semantics once we get rid of the AND.
7129 SDValue NewLoad(Load, 0);
7130
7131 // Fold the AND away. NewLoad may get replaced immediately.
7132 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7133
7134 if (Load->getExtensionType() == ISD::EXTLOAD) {
7135 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7136 Load->getValueType(0), SDLoc(Load),
7137 Load->getChain(), Load->getBasePtr(),
7138 Load->getOffset(), Load->getMemoryVT(),
7139 Load->getMemOperand());
7140 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7141 if (Load->getNumValues() == 3) {
7142 // PRE/POST_INC loads have 3 values.
7143 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7144 NewLoad.getValue(2) };
7145 CombineTo(Load, To, 3, true);
7146 } else {
7147 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7148 }
7149 }
7150
7151 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7152 }
7153 }
7154
7155 // Try to convert a constant mask AND into a shuffle clear mask.
7156 if (VT.isVector())
7157 if (SDValue Shuffle = XformToShuffleWithZero(N))
7158 return Shuffle;
7159
7160 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7161 return Combined;
7162
7163 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7165 SDValue Ext = N0.getOperand(0);
7166 EVT ExtVT = Ext->getValueType(0);
7167 SDValue Extendee = Ext->getOperand(0);
7168
7169 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7170 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7171 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7172 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7173 // => (extract_subvector (iN_zeroext v))
7174 SDValue ZeroExtExtendee =
7175 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7176
7177 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7178 N0.getOperand(1));
7179 }
7180 }
7181
7182 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7183 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7184 EVT MemVT = GN0->getMemoryVT();
7185 EVT ScalarVT = MemVT.getScalarType();
7186
7187 if (SDValue(GN0, 0).hasOneUse() &&
7188 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7190 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7191 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7192
7193 SDValue ZExtLoad = DAG.getMaskedGather(
7194 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7195 GN0->getIndexType(), ISD::ZEXTLOAD);
7196
7197 CombineTo(N, ZExtLoad);
7198 AddToWorklist(ZExtLoad.getNode());
7199 // Avoid recheck of N.
7200 return SDValue(N, 0);
7201 }
7202 }
7203
7204 // fold (and (load x), 255) -> (zextload x, i8)
7205 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7206 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7207 if (SDValue Res = reduceLoadWidth(N))
7208 return Res;
7209
7210 if (LegalTypes) {
7211 // Attempt to propagate the AND back up to the leaves which, if they're
7212 // loads, can be combined to narrow loads and the AND node can be removed.
7213 // Perform after legalization so that extend nodes will already be
7214 // combined into the loads.
7215 if (BackwardsPropagateMask(N))
7216 return SDValue(N, 0);
7217 }
7218
7219 if (SDValue Combined = visitANDLike(N0, N1, N))
7220 return Combined;
7221
7222 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7223 if (N0.getOpcode() == N1.getOpcode())
7224 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7225 return V;
7226
7227 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7228 return R;
7229 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7230 return R;
7231
7232 // Masking the negated extension of a boolean is just the zero-extended
7233 // boolean:
7234 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7235 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7236 //
7237 // Note: the SimplifyDemandedBits fold below can make an information-losing
7238 // transform, and then we have no way to find this better fold.
7239 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7240 if (isNullOrNullSplat(N0.getOperand(0))) {
7241 SDValue SubRHS = N0.getOperand(1);
7242 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7243 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7244 return SubRHS;
7245 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7246 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7247 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7248 }
7249 }
7250
7251 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7252 // fold (and (sra)) -> (and (srl)) when possible.
7254 return SDValue(N, 0);
7255
7256 // fold (zext_inreg (extload x)) -> (zextload x)
7257 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7258 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7259 (ISD::isEXTLoad(N0.getNode()) ||
7260 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7261 auto *LN0 = cast<LoadSDNode>(N0);
7262 EVT MemVT = LN0->getMemoryVT();
7263 // If we zero all the possible extended bits, then we can turn this into
7264 // a zextload if we are running before legalize or the operation is legal.
7265 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7266 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7267 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7268 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7269 ((!LegalOperations && LN0->isSimple()) ||
7270 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7271 SDValue ExtLoad =
7272 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7273 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7274 AddToWorklist(N);
7275 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7276 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7277 }
7278 }
7279
7280 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7281 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7282 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7283 N0.getOperand(1), false))
7284 return BSwap;
7285 }
7286
7287 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7288 return Shifts;
7289
7290 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7291 return V;
7292
7293 // Recognize the following pattern:
7294 //
7295 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7296 //
7297 // where bitmask is a mask that clears the upper bits of AndVT. The
7298 // number of bits in bitmask must be a power of two.
7299 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7300 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7301 return false;
7302
7303 auto *C = dyn_cast<ConstantSDNode>(RHS);
7304 if (!C)
7305 return false;
7306
7307 if (!C->getAPIntValue().isMask(
7308 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7309 return false;
7310
7311 return true;
7312 };
7313
7314 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7315 if (IsAndZeroExtMask(N0, N1))
7316 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7317
7318 if (hasOperation(ISD::USUBSAT, VT))
7319 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7320 return V;
7321
7322 // Postpone until legalization completed to avoid interference with bswap
7323 // folding
7324 if (LegalOperations || VT.isVector())
7325 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7326 return R;
7327
7328 return SDValue();
7329}
7330
7331/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7332SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7333 bool DemandHighBits) {
7334 if (!LegalOperations)
7335 return SDValue();
7336
7337 EVT VT = N->getValueType(0);
7338 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7339 return SDValue();
7341 return SDValue();
7342
7343 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7344 bool LookPassAnd0 = false;
7345 bool LookPassAnd1 = false;
7346 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7347 std::swap(N0, N1);
7348 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7349 std::swap(N0, N1);
7350 if (N0.getOpcode() == ISD::AND) {
7351 if (!N0->hasOneUse())
7352 return SDValue();
7353 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7354 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7355 // This is needed for X86.
7356 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7357 N01C->getZExtValue() != 0xFFFF))
7358 return SDValue();
7359 N0 = N0.getOperand(0);
7360 LookPassAnd0 = true;
7361 }
7362
7363 if (N1.getOpcode() == ISD::AND) {
7364 if (!N1->hasOneUse())
7365 return SDValue();
7366 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7367 if (!N11C || N11C->getZExtValue() != 0xFF)
7368 return SDValue();
7369 N1 = N1.getOperand(0);
7370 LookPassAnd1 = true;
7371 }
7372
7373 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7374 std::swap(N0, N1);
7375 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7376 return SDValue();
7377 if (!N0->hasOneUse() || !N1->hasOneUse())
7378 return SDValue();
7379
7380 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7381 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7382 if (!N01C || !N11C)
7383 return SDValue();
7384 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7385 return SDValue();
7386
7387 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7388 SDValue N00 = N0->getOperand(0);
7389 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7390 if (!N00->hasOneUse())
7391 return SDValue();
7392 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7393 if (!N001C || N001C->getZExtValue() != 0xFF)
7394 return SDValue();
7395 N00 = N00.getOperand(0);
7396 LookPassAnd0 = true;
7397 }
7398
7399 SDValue N10 = N1->getOperand(0);
7400 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7401 if (!N10->hasOneUse())
7402 return SDValue();
7403 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7404 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7405 // for X86.
7406 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7407 N101C->getZExtValue() != 0xFFFF))
7408 return SDValue();
7409 N10 = N10.getOperand(0);
7410 LookPassAnd1 = true;
7411 }
7412
7413 if (N00 != N10)
7414 return SDValue();
7415
7416 // Make sure everything beyond the low halfword gets set to zero since the SRL
7417 // 16 will clear the top bits.
7418 unsigned OpSizeInBits = VT.getSizeInBits();
7419 if (OpSizeInBits > 16) {
7420 // If the left-shift isn't masked out then the only way this is a bswap is
7421 // if all bits beyond the low 8 are 0. In that case the entire pattern
7422 // reduces to a left shift anyway: leave it for other parts of the combiner.
7423 if (DemandHighBits && !LookPassAnd0)
7424 return SDValue();
7425
7426 // However, if the right shift isn't masked out then it might be because
7427 // it's not needed. See if we can spot that too. If the high bits aren't
7428 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7429 // upper bits to be zero.
7430 if (!LookPassAnd1) {
7431 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7432 if (!DAG.MaskedValueIsZero(N10,
7433 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7434 return SDValue();
7435 }
7436 }
7437
7438 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7439 if (OpSizeInBits > 16) {
7440 SDLoc DL(N);
7441 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7442 DAG.getShiftAmountConstant(OpSizeInBits - 16, VT, DL));
7443 }
7444 return Res;
7445}
7446
7447/// Return true if the specified node is an element that makes up a 32-bit
7448/// packed halfword byteswap.
7449/// ((x & 0x000000ff) << 8) |
7450/// ((x & 0x0000ff00) >> 8) |
7451/// ((x & 0x00ff0000) << 8) |
7452/// ((x & 0xff000000) >> 8)
7454 if (!N->hasOneUse())
7455 return false;
7456
7457 unsigned Opc = N.getOpcode();
7458 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7459 return false;
7460
7461 SDValue N0 = N.getOperand(0);
7462 unsigned Opc0 = N0.getOpcode();
7463 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7464 return false;
7465
7466 ConstantSDNode *N1C = nullptr;
7467 // SHL or SRL: look upstream for AND mask operand
7468 if (Opc == ISD::AND)
7469 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7470 else if (Opc0 == ISD::AND)
7471 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7472 if (!N1C)
7473 return false;
7474
7475 unsigned MaskByteOffset;
7476 switch (N1C->getZExtValue()) {
7477 default:
7478 return false;
7479 case 0xFF: MaskByteOffset = 0; break;
7480 case 0xFF00: MaskByteOffset = 1; break;
7481 case 0xFFFF:
7482 // In case demanded bits didn't clear the bits that will be shifted out.
7483 // This is needed for X86.
7484 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7485 MaskByteOffset = 1;
7486 break;
7487 }
7488 return false;
7489 case 0xFF0000: MaskByteOffset = 2; break;
7490 case 0xFF000000: MaskByteOffset = 3; break;
7491 }
7492
7493 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7494 if (Opc == ISD::AND) {
7495 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7496 // (x >> 8) & 0xff
7497 // (x >> 8) & 0xff0000
7498 if (Opc0 != ISD::SRL)
7499 return false;
7500 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7501 if (!C || C->getZExtValue() != 8)
7502 return false;
7503 } else {
7504 // (x << 8) & 0xff00
7505 // (x << 8) & 0xff000000
7506 if (Opc0 != ISD::SHL)
7507 return false;
7508 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7509 if (!C || C->getZExtValue() != 8)
7510 return false;
7511 }
7512 } else if (Opc == ISD::SHL) {
7513 // (x & 0xff) << 8
7514 // (x & 0xff0000) << 8
7515 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7516 return false;
7517 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7518 if (!C || C->getZExtValue() != 8)
7519 return false;
7520 } else { // Opc == ISD::SRL
7521 // (x & 0xff00) >> 8
7522 // (x & 0xff000000) >> 8
7523 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7524 return false;
7525 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7526 if (!C || C->getZExtValue() != 8)
7527 return false;
7528 }
7529
7530 if (Parts[MaskByteOffset])
7531 return false;
7532
7533 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7534 return true;
7535}
7536
7537// Match 2 elements of a packed halfword bswap.
7539 if (N.getOpcode() == ISD::OR)
7540 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7541 isBSwapHWordElement(N.getOperand(1), Parts);
7542
7543 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7544 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7545 if (!C || C->getAPIntValue() != 16)
7546 return false;
7547 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7548 return true;
7549 }
7550
7551 return false;
7552}
7553
7554// Match this pattern:
7555// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7556// And rewrite this to:
7557// (rotr (bswap A), 16)
7559 SelectionDAG &DAG, SDNode *N, SDValue N0,
7560 SDValue N1, EVT VT) {
7561 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7562 "MatchBSwapHWordOrAndAnd: expecting i32");
7563 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7564 return SDValue();
7565 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7566 return SDValue();
7567 // TODO: this is too restrictive; lifting this restriction requires more tests
7568 if (!N0->hasOneUse() || !N1->hasOneUse())
7569 return SDValue();
7572 if (!Mask0 || !Mask1)
7573 return SDValue();
7574 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7575 Mask1->getAPIntValue() != 0x00ff00ff)
7576 return SDValue();
7577 SDValue Shift0 = N0.getOperand(0);
7578 SDValue Shift1 = N1.getOperand(0);
7579 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7580 return SDValue();
7581 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7582 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7583 if (!ShiftAmt0 || !ShiftAmt1)
7584 return SDValue();
7585 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7586 return SDValue();
7587 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7588 return SDValue();
7589
7590 SDLoc DL(N);
7591 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7592 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7593 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7594}
7595
7596/// Match a 32-bit packed halfword bswap. That is
7597/// ((x & 0x000000ff) << 8) |
7598/// ((x & 0x0000ff00) >> 8) |
7599/// ((x & 0x00ff0000) << 8) |
7600/// ((x & 0xff000000) >> 8)
7601/// => (rotl (bswap x), 16)
7602SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7603 if (!LegalOperations)
7604 return SDValue();
7605
7606 EVT VT = N->getValueType(0);
7607 if (VT != MVT::i32)
7608 return SDValue();
7610 return SDValue();
7611
7612 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT))
7613 return BSwap;
7614
7615 // Try again with commuted operands.
7616 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT))
7617 return BSwap;
7618
7619
7620 // Look for either
7621 // (or (bswaphpair), (bswaphpair))
7622 // (or (or (bswaphpair), (and)), (and))
7623 // (or (or (and), (bswaphpair)), (and))
7624 SDNode *Parts[4] = {};
7625
7626 if (isBSwapHWordPair(N0, Parts)) {
7627 // (or (or (and), (and)), (or (and), (and)))
7628 if (!isBSwapHWordPair(N1, Parts))
7629 return SDValue();
7630 } else if (N0.getOpcode() == ISD::OR) {
7631 // (or (or (or (and), (and)), (and)), (and))
7632 if (!isBSwapHWordElement(N1, Parts))
7633 return SDValue();
7634 SDValue N00 = N0.getOperand(0);
7635 SDValue N01 = N0.getOperand(1);
7636 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7637 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7638 return SDValue();
7639 } else {
7640 return SDValue();
7641 }
7642
7643 // Make sure the parts are all coming from the same node.
7644 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7645 return SDValue();
7646
7647 SDLoc DL(N);
7648 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7649 SDValue(Parts[0], 0));
7650
7651 // Result of the bswap should be rotated by 16. If it's not legal, then
7652 // do (x << 16) | (x >> 16).
7653 SDValue ShAmt = DAG.getShiftAmountConstant(16, VT, DL);
7655 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7657 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7658 return DAG.getNode(ISD::OR, DL, VT,
7659 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7660 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7661}
7662
7663/// This contains all DAGCombine rules which reduce two values combined by
7664/// an Or operation to a single value \see visitANDLike().
7665SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7666 EVT VT = N1.getValueType();
7667
7668 // fold (or x, undef) -> -1
7669 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7670 return DAG.getAllOnesConstant(DL, VT);
7671
7672 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7673 return V;
7674
7675 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7676 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7677 // Don't increase # computations.
7678 (N0->hasOneUse() || N1->hasOneUse())) {
7679 // We can only do this xform if we know that bits from X that are set in C2
7680 // but not in C1 are already zero. Likewise for Y.
7681 if (const ConstantSDNode *N0O1C =
7683 if (const ConstantSDNode *N1O1C =
7685 // We can only do this xform if we know that bits from X that are set in
7686 // C2 but not in C1 are already zero. Likewise for Y.
7687 const APInt &LHSMask = N0O1C->getAPIntValue();
7688 const APInt &RHSMask = N1O1C->getAPIntValue();
7689
7690 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7691 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7692 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7693 N0.getOperand(0), N1.getOperand(0));
7694 return DAG.getNode(ISD::AND, DL, VT, X,
7695 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7696 }
7697 }
7698 }
7699 }
7700
7701 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7702 if (N0.getOpcode() == ISD::AND &&
7703 N1.getOpcode() == ISD::AND &&
7704 N0.getOperand(0) == N1.getOperand(0) &&
7705 // Don't increase # computations.
7706 (N0->hasOneUse() || N1->hasOneUse())) {
7707 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7708 N0.getOperand(1), N1.getOperand(1));
7709 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7710 }
7711
7712 return SDValue();
7713}
7714
7715/// OR combines for which the commuted variant will be tried as well.
7717 SDNode *N) {
7718 EVT VT = N0.getValueType();
7719 unsigned BW = VT.getScalarSizeInBits();
7720 SDLoc DL(N);
7721
7722 auto peekThroughResize = [](SDValue V) {
7723 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7724 return V->getOperand(0);
7725 return V;
7726 };
7727
7728 SDValue N0Resized = peekThroughResize(N0);
7729 if (N0Resized.getOpcode() == ISD::AND) {
7730 SDValue N1Resized = peekThroughResize(N1);
7731 SDValue N00 = N0Resized.getOperand(0);
7732 SDValue N01 = N0Resized.getOperand(1);
7733
7734 // fold or (and x, y), x --> x
7735 if (N00 == N1Resized || N01 == N1Resized)
7736 return N1;
7737
7738 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7739 // TODO: Set AllowUndefs = true.
7740 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7741 /* AllowUndefs */ false)) {
7742 if (peekThroughResize(NotOperand) == N1Resized)
7743 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7744 N1);
7745 }
7746
7747 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7748 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7749 /* AllowUndefs */ false)) {
7750 if (peekThroughResize(NotOperand) == N1Resized)
7751 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7752 N1);
7753 }
7754 }
7755
7756 SDValue X, Y;
7757
7758 // fold or (xor X, N1), N1 --> or X, N1
7759 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7760 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7761
7762 // fold or (xor x, y), (x and/or y) --> or x, y
7763 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7764 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7766 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7767
7768 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7769 return R;
7770
7771 auto peekThroughZext = [](SDValue V) {
7772 if (V->getOpcode() == ISD::ZERO_EXTEND)
7773 return V->getOperand(0);
7774 return V;
7775 };
7776
7777 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7778 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7779 N0.getOperand(0) == N1.getOperand(0) &&
7780 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7781 return N0;
7782
7783 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7784 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7785 N0.getOperand(1) == N1.getOperand(0) &&
7786 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7787 return N0;
7788
7789 // Attempt to match a legalized build_pair-esque pattern:
7790 // or(shl(aext(Hi),BW/2),zext(Lo))
7791 SDValue Lo, Hi;
7792 if (sd_match(N0,
7794 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7795 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7796 Lo.getValueType() == Hi.getValueType()) {
7797 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7798 SDValue NotLo, NotHi;
7799 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7800 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7801 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7802 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7803 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7804 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7805 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7806 }
7807 }
7808
7809 return SDValue();
7810}
7811
7812SDValue DAGCombiner::visitOR(SDNode *N) {
7813 SDValue N0 = N->getOperand(0);
7814 SDValue N1 = N->getOperand(1);
7815 EVT VT = N1.getValueType();
7816 SDLoc DL(N);
7817
7818 // x | x --> x
7819 if (N0 == N1)
7820 return N0;
7821
7822 // fold (or c1, c2) -> c1|c2
7823 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7824 return C;
7825
7826 // canonicalize constant to RHS
7829 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7830
7831 // fold vector ops
7832 if (VT.isVector()) {
7833 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7834 return FoldedVOp;
7835
7836 // fold (or x, 0) -> x, vector edition
7838 return N0;
7839
7840 // fold (or x, -1) -> -1, vector edition
7842 // do not return N1, because undef node may exist in N1
7843 return DAG.getAllOnesConstant(DL, N1.getValueType());
7844
7845 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7846 // Do this only if the resulting type / shuffle is legal.
7847 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7848 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7849 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7850 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7851 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7852 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7853 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7854 // Ensure both shuffles have a zero input.
7855 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7856 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7857 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7858 bool CanFold = true;
7859 int NumElts = VT.getVectorNumElements();
7860 SmallVector<int, 4> Mask(NumElts, -1);
7861
7862 for (int i = 0; i != NumElts; ++i) {
7863 int M0 = SV0->getMaskElt(i);
7864 int M1 = SV1->getMaskElt(i);
7865
7866 // Determine if either index is pointing to a zero vector.
7867 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7868 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7869
7870 // If one element is zero and the otherside is undef, keep undef.
7871 // This also handles the case that both are undef.
7872 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7873 continue;
7874
7875 // Make sure only one of the elements is zero.
7876 if (M0Zero == M1Zero) {
7877 CanFold = false;
7878 break;
7879 }
7880
7881 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7882
7883 // We have a zero and non-zero element. If the non-zero came from
7884 // SV0 make the index a LHS index. If it came from SV1, make it
7885 // a RHS index. We need to mod by NumElts because we don't care
7886 // which operand it came from in the original shuffles.
7887 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7888 }
7889
7890 if (CanFold) {
7891 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7892 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7893 SDValue LegalShuffle =
7894 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7895 if (LegalShuffle)
7896 return LegalShuffle;
7897 }
7898 }
7899 }
7900 }
7901
7902 // fold (or x, 0) -> x
7903 if (isNullConstant(N1))
7904 return N0;
7905
7906 // fold (or x, -1) -> -1
7907 if (isAllOnesConstant(N1))
7908 return N1;
7909
7910 if (SDValue NewSel = foldBinOpIntoSelect(N))
7911 return NewSel;
7912
7913 // fold (or x, c) -> c iff (x & ~c) == 0
7914 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7915 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7916 return N1;
7917
7918 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7919 return R;
7920
7921 if (SDValue Combined = visitORLike(N0, N1, DL))
7922 return Combined;
7923
7924 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7925 return Combined;
7926
7927 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7928 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7929 return BSwap;
7930 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7931 return BSwap;
7932
7933 // reassociate or
7934 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7935 return ROR;
7936
7937 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7938 if (SDValue SD =
7939 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7940 return SD;
7941
7942 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7943 // iff (c1 & c2) != 0 or c1/c2 are undef.
7944 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7945 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7946 };
7947 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7948 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7949 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7950 {N1, N0.getOperand(1)})) {
7951 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7952 AddToWorklist(IOR.getNode());
7953 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
7954 }
7955 }
7956
7957 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7958 return Combined;
7959 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7960 return Combined;
7961
7962 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7963 if (N0.getOpcode() == N1.getOpcode())
7964 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7965 return V;
7966
7967 // See if this is some rotate idiom.
7968 if (SDValue Rot = MatchRotate(N0, N1, DL))
7969 return Rot;
7970
7971 if (SDValue Load = MatchLoadCombine(N))
7972 return Load;
7973
7974 // Simplify the operands using demanded-bits information.
7976 return SDValue(N, 0);
7977
7978 // If OR can be rewritten into ADD, try combines based on ADD.
7979 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7980 DAG.isADDLike(SDValue(N, 0)))
7981 if (SDValue Combined = visitADDLike(N))
7982 return Combined;
7983
7984 // Postpone until legalization completed to avoid interference with bswap
7985 // folding
7986 if (LegalOperations || VT.isVector())
7987 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7988 return R;
7989
7990 return SDValue();
7991}
7992
7994 SDValue &Mask) {
7995 if (Op.getOpcode() == ISD::AND &&
7996 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7997 Mask = Op.getOperand(1);
7998 return Op.getOperand(0);
7999 }
8000 return Op;
8001}
8002
8003/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8004static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8005 SDValue &Mask) {
8006 Op = stripConstantMask(DAG, Op, Mask);
8007 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8008 Shift = Op;
8009 return true;
8010 }
8011 return false;
8012}
8013
8014/// Helper function for visitOR to extract the needed side of a rotate idiom
8015/// from a shl/srl/mul/udiv. This is meant to handle cases where
8016/// InstCombine merged some outside op with one of the shifts from
8017/// the rotate pattern.
8018/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8019/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8020/// patterns:
8021///
8022/// (or (add v v) (shrl v bitwidth-1)):
8023/// expands (add v v) -> (shl v 1)
8024///
8025/// (or (mul v c0) (shrl (mul v c1) c2)):
8026/// expands (mul v c0) -> (shl (mul v c1) c3)
8027///
8028/// (or (udiv v c0) (shl (udiv v c1) c2)):
8029/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8030///
8031/// (or (shl v c0) (shrl (shl v c1) c2)):
8032/// expands (shl v c0) -> (shl (shl v c1) c3)
8033///
8034/// (or (shrl v c0) (shl (shrl v c1) c2)):
8035/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8036///
8037/// Such that in all cases, c3+c2==bitwidth(op v c1).
8039 SDValue ExtractFrom, SDValue &Mask,
8040 const SDLoc &DL) {
8041 assert(OppShift && ExtractFrom && "Empty SDValue");
8042 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8043 return SDValue();
8044
8045 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8046
8047 // Value and Type of the shift.
8048 SDValue OppShiftLHS = OppShift.getOperand(0);
8049 EVT ShiftedVT = OppShiftLHS.getValueType();
8050
8051 // Amount of the existing shift.
8052 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8053
8054 // (add v v) -> (shl v 1)
8055 // TODO: Should this be a general DAG canonicalization?
8056 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8057 ExtractFrom.getOpcode() == ISD::ADD &&
8058 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8059 ExtractFrom.getOperand(0) == OppShiftLHS &&
8060 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8061 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8062 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8063
8064 // Preconditions:
8065 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8066 //
8067 // Find opcode of the needed shift to be extracted from (op0 v c0).
8068 unsigned Opcode = ISD::DELETED_NODE;
8069 bool IsMulOrDiv = false;
8070 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8071 // opcode or its arithmetic (mul or udiv) variant.
8072 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8073 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8074 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8075 return false;
8076 Opcode = NeededShift;
8077 return true;
8078 };
8079 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8080 // that the needed shift can be extracted from.
8081 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8082 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8083 return SDValue();
8084
8085 // op0 must be the same opcode on both sides, have the same LHS argument,
8086 // and produce the same value type.
8087 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8088 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8089 ShiftedVT != ExtractFrom.getValueType())
8090 return SDValue();
8091
8092 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8093 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8094 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8095 ConstantSDNode *ExtractFromCst =
8096 isConstOrConstSplat(ExtractFrom.getOperand(1));
8097 // TODO: We should be able to handle non-uniform constant vectors for these values
8098 // Check that we have constant values.
8099 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8100 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8101 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8102 return SDValue();
8103
8104 // Compute the shift amount we need to extract to complete the rotate.
8105 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8106 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8107 return SDValue();
8108 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8109 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8110 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8111 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8112 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8113
8114 // Now try extract the needed shift from the ExtractFrom op and see if the
8115 // result matches up with the existing shift's LHS op.
8116 if (IsMulOrDiv) {
8117 // Op to extract from is a mul or udiv by a constant.
8118 // Check:
8119 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8120 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8121 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8122 NeededShiftAmt.getZExtValue());
8123 APInt ResultAmt;
8124 APInt Rem;
8125 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8126 if (Rem != 0 || ResultAmt != OppLHSAmt)
8127 return SDValue();
8128 } else {
8129 // Op to extract from is a shift by a constant.
8130 // Check:
8131 // c2 - (bitwidth(op0 v c0) - c1) == c0
8132 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8133 ExtractFromAmt.getBitWidth()))
8134 return SDValue();
8135 }
8136
8137 // Return the expanded shift op that should allow a rotate to be formed.
8138 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8139 EVT ResVT = ExtractFrom.getValueType();
8140 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8141 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8142}
8143
8144// Return true if we can prove that, whenever Neg and Pos are both in the
8145// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8146// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8147//
8148// (or (shift1 X, Neg), (shift2 X, Pos))
8149//
8150// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8151// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8152// to consider shift amounts with defined behavior.
8153//
8154// The IsRotate flag should be set when the LHS of both shifts is the same.
8155// Otherwise if matching a general funnel shift, it should be clear.
8156static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8157 SelectionDAG &DAG, bool IsRotate) {
8158 const auto &TLI = DAG.getTargetLoweringInfo();
8159 // If EltSize is a power of 2 then:
8160 //
8161 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8162 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8163 //
8164 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8165 // for the stronger condition:
8166 //
8167 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8168 //
8169 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8170 // we can just replace Neg with Neg' for the rest of the function.
8171 //
8172 // In other cases we check for the even stronger condition:
8173 //
8174 // Neg == EltSize - Pos [B]
8175 //
8176 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8177 // behavior if Pos == 0 (and consequently Neg == EltSize).
8178 //
8179 // We could actually use [A] whenever EltSize is a power of 2, but the
8180 // only extra cases that it would match are those uninteresting ones
8181 // where Neg and Pos are never in range at the same time. E.g. for
8182 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8183 // as well as (sub 32, Pos), but:
8184 //
8185 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8186 //
8187 // always invokes undefined behavior for 32-bit X.
8188 //
8189 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8190 // This allows us to peek through any operations that only affect Mask's
8191 // un-demanded bits.
8192 //
8193 // NOTE: We can only do this when matching operations which won't modify the
8194 // least Log2(EltSize) significant bits and not a general funnel shift.
8195 unsigned MaskLoBits = 0;
8196 if (IsRotate && isPowerOf2_64(EltSize)) {
8197 unsigned Bits = Log2_64(EltSize);
8198 unsigned NegBits = Neg.getScalarValueSizeInBits();
8199 if (NegBits >= Bits) {
8200 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8201 if (SDValue Inner =
8203 Neg = Inner;
8204 MaskLoBits = Bits;
8205 }
8206 }
8207 }
8208
8209 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8210 if (Neg.getOpcode() != ISD::SUB)
8211 return false;
8213 if (!NegC)
8214 return false;
8215 SDValue NegOp1 = Neg.getOperand(1);
8216
8217 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8218 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8219 // are redundant for the purpose of the equality.
8220 if (MaskLoBits) {
8221 unsigned PosBits = Pos.getScalarValueSizeInBits();
8222 if (PosBits >= MaskLoBits) {
8223 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8224 if (SDValue Inner =
8226 Pos = Inner;
8227 }
8228 }
8229 }
8230
8231 // The condition we need is now:
8232 //
8233 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8234 //
8235 // If NegOp1 == Pos then we need:
8236 //
8237 // EltSize & Mask == NegC & Mask
8238 //
8239 // (because "x & Mask" is a truncation and distributes through subtraction).
8240 //
8241 // We also need to account for a potential truncation of NegOp1 if the amount
8242 // has already been legalized to a shift amount type.
8243 APInt Width;
8244 if ((Pos == NegOp1) ||
8245 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8246 Width = NegC->getAPIntValue();
8247
8248 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8249 // Then the condition we want to prove becomes:
8250 //
8251 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8252 //
8253 // which, again because "x & Mask" is a truncation, becomes:
8254 //
8255 // NegC & Mask == (EltSize - PosC) & Mask
8256 // EltSize & Mask == (NegC + PosC) & Mask
8257 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8258 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8259 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8260 else
8261 return false;
8262 } else
8263 return false;
8264
8265 // Now we just need to check that EltSize & Mask == Width & Mask.
8266 if (MaskLoBits)
8267 // EltSize & Mask is 0 since Mask is EltSize - 1.
8268 return Width.getLoBits(MaskLoBits) == 0;
8269 return Width == EltSize;
8270}
8271
8272// A subroutine of MatchRotate used once we have found an OR of two opposite
8273// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8274// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8275// former being preferred if supported. InnerPos and InnerNeg are Pos and
8276// Neg with outer conversions stripped away.
8277SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8278 SDValue Neg, SDValue InnerPos,
8279 SDValue InnerNeg, bool HasPos,
8280 unsigned PosOpcode, unsigned NegOpcode,
8281 const SDLoc &DL) {
8282 // fold (or (shl x, (*ext y)),
8283 // (srl x, (*ext (sub 32, y)))) ->
8284 // (rotl x, y) or (rotr x, (sub 32, y))
8285 //
8286 // fold (or (shl x, (*ext (sub 32, y))),
8287 // (srl x, (*ext y))) ->
8288 // (rotr x, y) or (rotl x, (sub 32, y))
8289 EVT VT = Shifted.getValueType();
8290 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8291 /*IsRotate*/ true)) {
8292 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8293 HasPos ? Pos : Neg);
8294 }
8295
8296 return SDValue();
8297}
8298
8299// A subroutine of MatchRotate used once we have found an OR of two opposite
8300// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8301// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8302// former being preferred if supported. InnerPos and InnerNeg are Pos and
8303// Neg with outer conversions stripped away.
8304// TODO: Merge with MatchRotatePosNeg.
8305SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8306 SDValue Neg, SDValue InnerPos,
8307 SDValue InnerNeg, bool HasPos,
8308 unsigned PosOpcode, unsigned NegOpcode,
8309 const SDLoc &DL) {
8310 EVT VT = N0.getValueType();
8311 unsigned EltBits = VT.getScalarSizeInBits();
8312
8313 // fold (or (shl x0, (*ext y)),
8314 // (srl x1, (*ext (sub 32, y)))) ->
8315 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8316 //
8317 // fold (or (shl x0, (*ext (sub 32, y))),
8318 // (srl x1, (*ext y))) ->
8319 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8320 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8321 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8322 HasPos ? Pos : Neg);
8323 }
8324
8325 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8326 // so for now just use the PosOpcode case if its legal.
8327 // TODO: When can we use the NegOpcode case?
8328 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8329 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8330 if (Op.getOpcode() != BinOpc)
8331 return false;
8332 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8333 return Cst && (Cst->getAPIntValue() == Imm);
8334 };
8335
8336 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8337 // -> (fshl x0, x1, y)
8338 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8339 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8340 InnerPos == InnerNeg.getOperand(0) &&
8342 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8343 }
8344
8345 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8346 // -> (fshr x0, x1, y)
8347 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8348 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8349 InnerNeg == InnerPos.getOperand(0) &&
8351 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8352 }
8353
8354 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8355 // -> (fshr x0, x1, y)
8356 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8357 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8358 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8359 InnerNeg == InnerPos.getOperand(0) &&
8361 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8362 }
8363 }
8364
8365 return SDValue();
8366}
8367
8368// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8369// idioms for rotate, and if the target supports rotation instructions, generate
8370// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8371// with different shifted sources.
8372SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8373 EVT VT = LHS.getValueType();
8374
8375 // The target must have at least one rotate/funnel flavor.
8376 // We still try to match rotate by constant pre-legalization.
8377 // TODO: Support pre-legalization funnel-shift by constant.
8378 bool HasROTL = hasOperation(ISD::ROTL, VT);
8379 bool HasROTR = hasOperation(ISD::ROTR, VT);
8380 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8381 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8382
8383 // If the type is going to be promoted and the target has enabled custom
8384 // lowering for rotate, allow matching rotate by non-constants. Only allow
8385 // this for scalar types.
8386 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8390 }
8391
8392 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8393 return SDValue();
8394
8395 // Check for truncated rotate.
8396 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8397 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8398 assert(LHS.getValueType() == RHS.getValueType());
8399 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8400 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8401 }
8402 }
8403
8404 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8405 SDValue LHSShift; // The shift.
8406 SDValue LHSMask; // AND value if any.
8407 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8408
8409 SDValue RHSShift; // The shift.
8410 SDValue RHSMask; // AND value if any.
8411 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8412
8413 // If neither side matched a rotate half, bail
8414 if (!LHSShift && !RHSShift)
8415 return SDValue();
8416
8417 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8418 // side of the rotate, so try to handle that here. In all cases we need to
8419 // pass the matched shift from the opposite side to compute the opcode and
8420 // needed shift amount to extract. We still want to do this if both sides
8421 // matched a rotate half because one half may be a potential overshift that
8422 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8423 // single one).
8424
8425 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8426 if (LHSShift)
8427 if (SDValue NewRHSShift =
8428 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8429 RHSShift = NewRHSShift;
8430 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8431 if (RHSShift)
8432 if (SDValue NewLHSShift =
8433 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8434 LHSShift = NewLHSShift;
8435
8436 // If a side is still missing, nothing else we can do.
8437 if (!RHSShift || !LHSShift)
8438 return SDValue();
8439
8440 // At this point we've matched or extracted a shift op on each side.
8441
8442 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8443 return SDValue(); // Shifts must disagree.
8444
8445 // Canonicalize shl to left side in a shl/srl pair.
8446 if (RHSShift.getOpcode() == ISD::SHL) {
8447 std::swap(LHS, RHS);
8448 std::swap(LHSShift, RHSShift);
8449 std::swap(LHSMask, RHSMask);
8450 }
8451
8452 // Something has gone wrong - we've lost the shl/srl pair - bail.
8453 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8454 return SDValue();
8455
8456 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8457 SDValue LHSShiftArg = LHSShift.getOperand(0);
8458 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8459 SDValue RHSShiftArg = RHSShift.getOperand(0);
8460 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8461
8462 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8464 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8465 };
8466
8467 auto ApplyMasks = [&](SDValue Res) {
8468 // If there is an AND of either shifted operand, apply it to the result.
8469 if (LHSMask.getNode() || RHSMask.getNode()) {
8472
8473 if (LHSMask.getNode()) {
8474 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8475 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8476 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8477 }
8478 if (RHSMask.getNode()) {
8479 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8480 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8481 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8482 }
8483
8484 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8485 }
8486
8487 return Res;
8488 };
8489
8490 // TODO: Support pre-legalization funnel-shift by constant.
8491 bool IsRotate = LHSShiftArg == RHSShiftArg;
8492 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8493 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8494 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8495 // Look for a disguised rotate by constant.
8496 // The common shifted operand X may be hidden inside another 'or'.
8497 SDValue X, Y;
8498 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8499 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8500 return false;
8501 if (CommonOp == Or.getOperand(0)) {
8502 X = CommonOp;
8503 Y = Or.getOperand(1);
8504 return true;
8505 }
8506 if (CommonOp == Or.getOperand(1)) {
8507 X = CommonOp;
8508 Y = Or.getOperand(0);
8509 return true;
8510 }
8511 return false;
8512 };
8513
8514 SDValue Res;
8515 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8516 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8517 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8518 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8519 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8520 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8521 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8522 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8523 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8524 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8525 } else {
8526 return SDValue();
8527 }
8528
8529 return ApplyMasks(Res);
8530 }
8531
8532 return SDValue(); // Requires funnel shift support.
8533 }
8534
8535 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8536 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8537 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8538 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8539 // iff C1+C2 == EltSizeInBits
8540 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8541 SDValue Res;
8542 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8543 bool UseROTL = !LegalOperations || HasROTL;
8544 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8545 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8546 } else {
8547 bool UseFSHL = !LegalOperations || HasFSHL;
8548 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8549 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8550 }
8551
8552 return ApplyMasks(Res);
8553 }
8554
8555 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8556 // shift.
8557 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8558 return SDValue();
8559
8560 // If there is a mask here, and we have a variable shift, we can't be sure
8561 // that we're masking out the right stuff.
8562 if (LHSMask.getNode() || RHSMask.getNode())
8563 return SDValue();
8564
8565 // If the shift amount is sign/zext/any-extended just peel it off.
8566 SDValue LExtOp0 = LHSShiftAmt;
8567 SDValue RExtOp0 = RHSShiftAmt;
8568 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8569 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8570 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8571 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8572 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8573 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8574 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8575 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8576 LExtOp0 = LHSShiftAmt.getOperand(0);
8577 RExtOp0 = RHSShiftAmt.getOperand(0);
8578 }
8579
8580 if (IsRotate && (HasROTL || HasROTR)) {
8581 SDValue TryL =
8582 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8583 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8584 if (TryL)
8585 return TryL;
8586
8587 SDValue TryR =
8588 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8589 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8590 if (TryR)
8591 return TryR;
8592 }
8593
8594 SDValue TryL =
8595 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8596 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8597 if (TryL)
8598 return TryL;
8599
8600 SDValue TryR =
8601 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8602 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8603 if (TryR)
8604 return TryR;
8605
8606 return SDValue();
8607}
8608
8609/// Recursively traverses the expression calculating the origin of the requested
8610/// byte of the given value. Returns std::nullopt if the provider can't be
8611/// calculated.
8612///
8613/// For all the values except the root of the expression, we verify that the
8614/// value has exactly one use and if not then return std::nullopt. This way if
8615/// the origin of the byte is returned it's guaranteed that the values which
8616/// contribute to the byte are not used outside of this expression.
8617
8618/// However, there is a special case when dealing with vector loads -- we allow
8619/// more than one use if the load is a vector type. Since the values that
8620/// contribute to the byte ultimately come from the ExtractVectorElements of the
8621/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8622/// because those operations are independent from the pattern to be combined.
8623/// For vector loads, we simply care that the ByteProviders are adjacent
8624/// positions of the same vector, and their index matches the byte that is being
8625/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8626/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8627/// byte position we are trying to provide for the LoadCombine. If these do
8628/// not match, then we can not combine the vector loads. \p Index uses the
8629/// byte position we are trying to provide for and is matched against the
8630/// shl and load size. The \p Index algorithm ensures the requested byte is
8631/// provided for by the pattern, and the pattern does not over provide bytes.
8632///
8633///
8634/// The supported LoadCombine pattern for vector loads is as follows
8635/// or
8636/// / \
8637/// or shl
8638/// / \ |
8639/// or shl zext
8640/// / \ | |
8641/// shl zext zext EVE*
8642/// | | | |
8643/// zext EVE* EVE* LOAD
8644/// | | |
8645/// EVE* LOAD LOAD
8646/// |
8647/// LOAD
8648///
8649/// *ExtractVectorElement
8651
8652static std::optional<SDByteProvider>
8654 std::optional<uint64_t> VectorIndex,
8655 unsigned StartingIndex = 0) {
8656
8657 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8658 if (Depth == 10)
8659 return std::nullopt;
8660
8661 // Only allow multiple uses if the instruction is a vector load (in which
8662 // case we will use the load for every ExtractVectorElement)
8663 if (Depth && !Op.hasOneUse() &&
8664 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8665 return std::nullopt;
8666
8667 // Fail to combine if we have encountered anything but a LOAD after handling
8668 // an ExtractVectorElement.
8669 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8670 return std::nullopt;
8671
8672 unsigned BitWidth = Op.getValueSizeInBits();
8673 if (BitWidth % 8 != 0)
8674 return std::nullopt;
8675 unsigned ByteWidth = BitWidth / 8;
8676 assert(Index < ByteWidth && "invalid index requested");
8677 (void) ByteWidth;
8678
8679 switch (Op.getOpcode()) {
8680 case ISD::OR: {
8681 auto LHS =
8682 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8683 if (!LHS)
8684 return std::nullopt;
8685 auto RHS =
8686 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8687 if (!RHS)
8688 return std::nullopt;
8689
8690 if (LHS->isConstantZero())
8691 return RHS;
8692 if (RHS->isConstantZero())
8693 return LHS;
8694 return std::nullopt;
8695 }
8696 case ISD::SHL: {
8697 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8698 if (!ShiftOp)
8699 return std::nullopt;
8700
8701 uint64_t BitShift = ShiftOp->getZExtValue();
8702
8703 if (BitShift % 8 != 0)
8704 return std::nullopt;
8705 uint64_t ByteShift = BitShift / 8;
8706
8707 // If we are shifting by an amount greater than the index we are trying to
8708 // provide, then do not provide anything. Otherwise, subtract the index by
8709 // the amount we shifted by.
8710 return Index < ByteShift
8712 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8713 Depth + 1, VectorIndex, Index);
8714 }
8715 case ISD::ANY_EXTEND:
8716 case ISD::SIGN_EXTEND:
8717 case ISD::ZERO_EXTEND: {
8718 SDValue NarrowOp = Op->getOperand(0);
8719 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8720 if (NarrowBitWidth % 8 != 0)
8721 return std::nullopt;
8722 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8723
8724 if (Index >= NarrowByteWidth)
8725 return Op.getOpcode() == ISD::ZERO_EXTEND
8726 ? std::optional<SDByteProvider>(
8728 : std::nullopt;
8729 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8730 StartingIndex);
8731 }
8732 case ISD::BSWAP:
8733 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8734 Depth + 1, VectorIndex, StartingIndex);
8736 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8737 if (!OffsetOp)
8738 return std::nullopt;
8739
8740 VectorIndex = OffsetOp->getZExtValue();
8741
8742 SDValue NarrowOp = Op->getOperand(0);
8743 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8744 if (NarrowBitWidth % 8 != 0)
8745 return std::nullopt;
8746 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8747 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8748 // type, leaving the high bits undefined.
8749 if (Index >= NarrowByteWidth)
8750 return std::nullopt;
8751
8752 // Check to see if the position of the element in the vector corresponds
8753 // with the byte we are trying to provide for. In the case of a vector of
8754 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8755 // the element will provide a range of bytes. For example, if we have a
8756 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8757 // 3).
8758 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8759 return std::nullopt;
8760 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8761 return std::nullopt;
8762
8763 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8764 VectorIndex, StartingIndex);
8765 }
8766 case ISD::LOAD: {
8767 auto L = cast<LoadSDNode>(Op.getNode());
8768 if (!L->isSimple() || L->isIndexed())
8769 return std::nullopt;
8770
8771 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8772 if (NarrowBitWidth % 8 != 0)
8773 return std::nullopt;
8774 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8775
8776 // If the width of the load does not reach byte we are trying to provide for
8777 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8778 // question
8779 if (Index >= NarrowByteWidth)
8780 return L->getExtensionType() == ISD::ZEXTLOAD
8781 ? std::optional<SDByteProvider>(
8783 : std::nullopt;
8784
8785 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8786 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8787 }
8788 }
8789
8790 return std::nullopt;
8791}
8792
8793static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8794 return i;
8795}
8796
8797static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8798 return BW - i - 1;
8799}
8800
8801// Check if the bytes offsets we are looking at match with either big or
8802// little endian value loaded. Return true for big endian, false for little
8803// endian, and std::nullopt if match failed.
8804static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8805 int64_t FirstOffset) {
8806 // The endian can be decided only when it is 2 bytes at least.
8807 unsigned Width = ByteOffsets.size();
8808 if (Width < 2)
8809 return std::nullopt;
8810
8811 bool BigEndian = true, LittleEndian = true;
8812 for (unsigned i = 0; i < Width; i++) {
8813 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8814 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8815 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8816 if (!BigEndian && !LittleEndian)
8817 return std::nullopt;
8818 }
8819
8820 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8821 "little endian");
8822 return BigEndian;
8823}
8824
8825// Look through one layer of truncate or extend.
8827 switch (Value.getOpcode()) {
8828 case ISD::TRUNCATE:
8829 case ISD::ZERO_EXTEND:
8830 case ISD::SIGN_EXTEND:
8831 case ISD::ANY_EXTEND:
8832 return Value.getOperand(0);
8833 }
8834 return SDValue();
8835}
8836
8837/// Match a pattern where a wide type scalar value is stored by several narrow
8838/// stores. Fold it into a single store or a BSWAP and a store if the targets
8839/// supports it.
8840///
8841/// Assuming little endian target:
8842/// i8 *p = ...
8843/// i32 val = ...
8844/// p[0] = (val >> 0) & 0xFF;
8845/// p[1] = (val >> 8) & 0xFF;
8846/// p[2] = (val >> 16) & 0xFF;
8847/// p[3] = (val >> 24) & 0xFF;
8848/// =>
8849/// *((i32)p) = val;
8850///
8851/// i8 *p = ...
8852/// i32 val = ...
8853/// p[0] = (val >> 24) & 0xFF;
8854/// p[1] = (val >> 16) & 0xFF;
8855/// p[2] = (val >> 8) & 0xFF;
8856/// p[3] = (val >> 0) & 0xFF;
8857/// =>
8858/// *((i32)p) = BSWAP(val);
8859SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8860 // The matching looks for "store (trunc x)" patterns that appear early but are
8861 // likely to be replaced by truncating store nodes during combining.
8862 // TODO: If there is evidence that running this later would help, this
8863 // limitation could be removed. Legality checks may need to be added
8864 // for the created store and optional bswap/rotate.
8865 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8866 return SDValue();
8867
8868 // We only handle merging simple stores of 1-4 bytes.
8869 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8870 EVT MemVT = N->getMemoryVT();
8871 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8872 !N->isSimple() || N->isIndexed())
8873 return SDValue();
8874
8875 // Collect all of the stores in the chain, upto the maximum store width (i64).
8876 SDValue Chain = N->getChain();
8878 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8879 unsigned MaxWideNumBits = 64;
8880 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8881 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8882 // All stores must be the same size to ensure that we are writing all of the
8883 // bytes in the wide value.
8884 // This store should have exactly one use as a chain operand for another
8885 // store in the merging set. If there are other chain uses, then the
8886 // transform may not be safe because order of loads/stores outside of this
8887 // set may not be preserved.
8888 // TODO: We could allow multiple sizes by tracking each stored byte.
8889 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8890 Store->isIndexed() || !Store->hasOneUse())
8891 return SDValue();
8892 Stores.push_back(Store);
8893 Chain = Store->getChain();
8894 if (MaxStores < Stores.size())
8895 return SDValue();
8896 }
8897 // There is no reason to continue if we do not have at least a pair of stores.
8898 if (Stores.size() < 2)
8899 return SDValue();
8900
8901 // Handle simple types only.
8902 LLVMContext &Context = *DAG.getContext();
8903 unsigned NumStores = Stores.size();
8904 unsigned WideNumBits = NumStores * NarrowNumBits;
8905 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8906 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8907 return SDValue();
8908
8909 // Check if all bytes of the source value that we are looking at are stored
8910 // to the same base address. Collect offsets from Base address into OffsetMap.
8911 SDValue SourceValue;
8912 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8913 int64_t FirstOffset = INT64_MAX;
8914 StoreSDNode *FirstStore = nullptr;
8915 std::optional<BaseIndexOffset> Base;
8916 for (auto *Store : Stores) {
8917 // All the stores store different parts of the CombinedValue. A truncate is
8918 // required to get the partial value.
8919 SDValue Trunc = Store->getValue();
8920 if (Trunc.getOpcode() != ISD::TRUNCATE)
8921 return SDValue();
8922 // Other than the first/last part, a shift operation is required to get the
8923 // offset.
8924 int64_t Offset = 0;
8925 SDValue WideVal = Trunc.getOperand(0);
8926 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8927 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8928 // The shift amount must be a constant multiple of the narrow type.
8929 // It is translated to the offset address in the wide source value "y".
8930 //
8931 // x = srl y, ShiftAmtC
8932 // i8 z = trunc x
8933 // store z, ...
8934 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8935 if (ShiftAmtC % NarrowNumBits != 0)
8936 return SDValue();
8937
8938 // Make sure we aren't reading bits that are shifted in.
8939 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
8940 return SDValue();
8941
8942 Offset = ShiftAmtC / NarrowNumBits;
8943 WideVal = WideVal.getOperand(0);
8944 }
8945
8946 // Stores must share the same source value with different offsets.
8947 if (!SourceValue)
8948 SourceValue = WideVal;
8949 else if (SourceValue != WideVal) {
8950 // Truncate and extends can be stripped to see if the values are related.
8951 if (stripTruncAndExt(SourceValue) != WideVal &&
8952 stripTruncAndExt(WideVal) != SourceValue)
8953 return SDValue();
8954
8955 if (WideVal.getScalarValueSizeInBits() >
8956 SourceValue.getScalarValueSizeInBits())
8957 SourceValue = WideVal;
8958
8959 // Give up if the source value type is smaller than the store size.
8960 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8961 return SDValue();
8962 }
8963
8964 // Stores must share the same base address.
8966 int64_t ByteOffsetFromBase = 0;
8967 if (!Base)
8968 Base = Ptr;
8969 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8970 return SDValue();
8971
8972 // Remember the first store.
8973 if (ByteOffsetFromBase < FirstOffset) {
8974 FirstStore = Store;
8975 FirstOffset = ByteOffsetFromBase;
8976 }
8977 // Map the offset in the store and the offset in the combined value, and
8978 // early return if it has been set before.
8979 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8980 return SDValue();
8981 OffsetMap[Offset] = ByteOffsetFromBase;
8982 }
8983
8984 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8985 assert(FirstStore && "First store must be set");
8986
8987 // Check that a store of the wide type is both allowed and fast on the target
8988 const DataLayout &Layout = DAG.getDataLayout();
8989 unsigned Fast = 0;
8990 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8991 *FirstStore->getMemOperand(), &Fast);
8992 if (!Allowed || !Fast)
8993 return SDValue();
8994
8995 // Check if the pieces of the value are going to the expected places in memory
8996 // to merge the stores.
8997 auto checkOffsets = [&](bool MatchLittleEndian) {
8998 if (MatchLittleEndian) {
8999 for (unsigned i = 0; i != NumStores; ++i)
9000 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9001 return false;
9002 } else { // MatchBigEndian by reversing loop counter.
9003 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9004 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9005 return false;
9006 }
9007 return true;
9008 };
9009
9010 // Check if the offsets line up for the native data layout of this target.
9011 bool NeedBswap = false;
9012 bool NeedRotate = false;
9013 if (!checkOffsets(Layout.isLittleEndian())) {
9014 // Special-case: check if byte offsets line up for the opposite endian.
9015 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9016 NeedBswap = true;
9017 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9018 NeedRotate = true;
9019 else
9020 return SDValue();
9021 }
9022
9023 SDLoc DL(N);
9024 if (WideVT != SourceValue.getValueType()) {
9025 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9026 "Unexpected store value to merge");
9027 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9028 }
9029
9030 // Before legalize we can introduce illegal bswaps/rotates which will be later
9031 // converted to an explicit bswap sequence. This way we end up with a single
9032 // store and byte shuffling instead of several stores and byte shuffling.
9033 if (NeedBswap) {
9034 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9035 } else if (NeedRotate) {
9036 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9037 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9038 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9039 }
9040
9041 SDValue NewStore =
9042 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9043 FirstStore->getPointerInfo(), FirstStore->getAlign());
9044
9045 // Rely on other DAG combine rules to remove the other individual stores.
9046 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9047 return NewStore;
9048}
9049
9050/// Match a pattern where a wide type scalar value is loaded by several narrow
9051/// loads and combined by shifts and ors. Fold it into a single load or a load
9052/// and a BSWAP if the targets supports it.
9053///
9054/// Assuming little endian target:
9055/// i8 *a = ...
9056/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9057/// =>
9058/// i32 val = *((i32)a)
9059///
9060/// i8 *a = ...
9061/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9062/// =>
9063/// i32 val = BSWAP(*((i32)a))
9064///
9065/// TODO: This rule matches complex patterns with OR node roots and doesn't
9066/// interact well with the worklist mechanism. When a part of the pattern is
9067/// updated (e.g. one of the loads) its direct users are put into the worklist,
9068/// but the root node of the pattern which triggers the load combine is not
9069/// necessarily a direct user of the changed node. For example, once the address
9070/// of t28 load is reassociated load combine won't be triggered:
9071/// t25: i32 = add t4, Constant:i32<2>
9072/// t26: i64 = sign_extend t25
9073/// t27: i64 = add t2, t26
9074/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9075/// t29: i32 = zero_extend t28
9076/// t32: i32 = shl t29, Constant:i8<8>
9077/// t33: i32 = or t23, t32
9078/// As a possible fix visitLoad can check if the load can be a part of a load
9079/// combine pattern and add corresponding OR roots to the worklist.
9080SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9081 assert(N->getOpcode() == ISD::OR &&
9082 "Can only match load combining against OR nodes");
9083
9084 // Handles simple types only
9085 EVT VT = N->getValueType(0);
9086 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9087 return SDValue();
9088 unsigned ByteWidth = VT.getSizeInBits() / 8;
9089
9090 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9091 auto MemoryByteOffset = [&](SDByteProvider P) {
9092 assert(P.hasSrc() && "Must be a memory byte provider");
9093 auto *Load = cast<LoadSDNode>(P.Src.value());
9094
9095 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9096
9097 assert(LoadBitWidth % 8 == 0 &&
9098 "can only analyze providers for individual bytes not bit");
9099 unsigned LoadByteWidth = LoadBitWidth / 8;
9100 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9101 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9102 };
9103
9104 std::optional<BaseIndexOffset> Base;
9105 SDValue Chain;
9106
9108 std::optional<SDByteProvider> FirstByteProvider;
9109 int64_t FirstOffset = INT64_MAX;
9110
9111 // Check if all the bytes of the OR we are looking at are loaded from the same
9112 // base address. Collect bytes offsets from Base address in ByteOffsets.
9113 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9114 unsigned ZeroExtendedBytes = 0;
9115 for (int i = ByteWidth - 1; i >= 0; --i) {
9116 auto P =
9117 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9118 /*StartingIndex*/ i);
9119 if (!P)
9120 return SDValue();
9121
9122 if (P->isConstantZero()) {
9123 // It's OK for the N most significant bytes to be 0, we can just
9124 // zero-extend the load.
9125 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9126 return SDValue();
9127 continue;
9128 }
9129 assert(P->hasSrc() && "provenance should either be memory or zero");
9130 auto *L = cast<LoadSDNode>(P->Src.value());
9131
9132 // All loads must share the same chain
9133 SDValue LChain = L->getChain();
9134 if (!Chain)
9135 Chain = LChain;
9136 else if (Chain != LChain)
9137 return SDValue();
9138
9139 // Loads must share the same base address
9141 int64_t ByteOffsetFromBase = 0;
9142
9143 // For vector loads, the expected load combine pattern will have an
9144 // ExtractElement for each index in the vector. While each of these
9145 // ExtractElements will be accessing the same base address as determined
9146 // by the load instruction, the actual bytes they interact with will differ
9147 // due to different ExtractElement indices. To accurately determine the
9148 // byte position of an ExtractElement, we offset the base load ptr with
9149 // the index multiplied by the byte size of each element in the vector.
9150 if (L->getMemoryVT().isVector()) {
9151 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9152 if (LoadWidthInBit % 8 != 0)
9153 return SDValue();
9154 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9155 Ptr.addToOffset(ByteOffsetFromVector);
9156 }
9157
9158 if (!Base)
9159 Base = Ptr;
9160
9161 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9162 return SDValue();
9163
9164 // Calculate the offset of the current byte from the base address
9165 ByteOffsetFromBase += MemoryByteOffset(*P);
9166 ByteOffsets[i] = ByteOffsetFromBase;
9167
9168 // Remember the first byte load
9169 if (ByteOffsetFromBase < FirstOffset) {
9170 FirstByteProvider = P;
9171 FirstOffset = ByteOffsetFromBase;
9172 }
9173
9174 Loads.insert(L);
9175 }
9176
9177 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9178 "memory, so there must be at least one load which produces the value");
9179 assert(Base && "Base address of the accessed memory location must be set");
9180 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9181
9182 bool NeedsZext = ZeroExtendedBytes > 0;
9183
9184 EVT MemVT =
9185 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9186
9187 if (!MemVT.isSimple())
9188 return SDValue();
9189
9190 // Before legalize we can introduce too wide illegal loads which will be later
9191 // split into legal sized loads. This enables us to combine i64 load by i8
9192 // patterns to a couple of i32 loads on 32 bit targets.
9193 if (LegalOperations &&
9195 MemVT))
9196 return SDValue();
9197
9198 // Check if the bytes of the OR we are looking at match with either big or
9199 // little endian value load
9200 std::optional<bool> IsBigEndian = isBigEndian(
9201 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9202 if (!IsBigEndian)
9203 return SDValue();
9204
9205 assert(FirstByteProvider && "must be set");
9206
9207 // Ensure that the first byte is loaded from zero offset of the first load.
9208 // So the combined value can be loaded from the first load address.
9209 if (MemoryByteOffset(*FirstByteProvider) != 0)
9210 return SDValue();
9211 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9212
9213 // The node we are looking at matches with the pattern, check if we can
9214 // replace it with a single (possibly zero-extended) load and bswap + shift if
9215 // needed.
9216
9217 // If the load needs byte swap check if the target supports it
9218 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9219
9220 // Before legalize we can introduce illegal bswaps which will be later
9221 // converted to an explicit bswap sequence. This way we end up with a single
9222 // load and byte shuffling instead of several loads and byte shuffling.
9223 // We do not introduce illegal bswaps when zero-extending as this tends to
9224 // introduce too many arithmetic instructions.
9225 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9226 !TLI.isOperationLegal(ISD::BSWAP, VT))
9227 return SDValue();
9228
9229 // If we need to bswap and zero extend, we have to insert a shift. Check that
9230 // it is legal.
9231 if (NeedsBswap && NeedsZext && LegalOperations &&
9232 !TLI.isOperationLegal(ISD::SHL, VT))
9233 return SDValue();
9234
9235 // Check that a load of the wide type is both allowed and fast on the target
9236 unsigned Fast = 0;
9237 bool Allowed =
9238 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9239 *FirstLoad->getMemOperand(), &Fast);
9240 if (!Allowed || !Fast)
9241 return SDValue();
9242
9243 SDValue NewLoad =
9244 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9245 Chain, FirstLoad->getBasePtr(),
9246 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9247
9248 // Transfer chain users from old loads to the new load.
9249 for (LoadSDNode *L : Loads)
9250 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9251
9252 if (!NeedsBswap)
9253 return NewLoad;
9254
9255 SDValue ShiftedLoad =
9256 NeedsZext ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9257 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8,
9258 VT, SDLoc(N)))
9259 : NewLoad;
9260 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9261}
9262
9263// If the target has andn, bsl, or a similar bit-select instruction,
9264// we want to unfold masked merge, with canonical pattern of:
9265// | A | |B|
9266// ((x ^ y) & m) ^ y
9267// | D |
9268// Into:
9269// (x & m) | (y & ~m)
9270// If y is a constant, m is not a 'not', and the 'andn' does not work with
9271// immediates, we unfold into a different pattern:
9272// ~(~x & m) & (m | y)
9273// If x is a constant, m is a 'not', and the 'andn' does not work with
9274// immediates, we unfold into a different pattern:
9275// (x | ~m) & ~(~m & ~y)
9276// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9277// the very least that breaks andnpd / andnps patterns, and because those
9278// patterns are simplified in IR and shouldn't be created in the DAG
9279SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9280 assert(N->getOpcode() == ISD::XOR);
9281
9282 // Don't touch 'not' (i.e. where y = -1).
9283 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9284 return SDValue();
9285
9286 EVT VT = N->getValueType(0);
9287
9288 // There are 3 commutable operators in the pattern,
9289 // so we have to deal with 8 possible variants of the basic pattern.
9290 SDValue X, Y, M;
9291 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9292 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9293 return false;
9294 SDValue Xor = And.getOperand(XorIdx);
9295 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9296 return false;
9297 SDValue Xor0 = Xor.getOperand(0);
9298 SDValue Xor1 = Xor.getOperand(1);
9299 // Don't touch 'not' (i.e. where y = -1).
9300 if (isAllOnesOrAllOnesSplat(Xor1))
9301 return false;
9302 if (Other == Xor0)
9303 std::swap(Xor0, Xor1);
9304 if (Other != Xor1)
9305 return false;
9306 X = Xor0;
9307 Y = Xor1;
9308 M = And.getOperand(XorIdx ? 0 : 1);
9309 return true;
9310 };
9311
9312 SDValue N0 = N->getOperand(0);
9313 SDValue N1 = N->getOperand(1);
9314 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9315 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9316 return SDValue();
9317
9318 // Don't do anything if the mask is constant. This should not be reachable.
9319 // InstCombine should have already unfolded this pattern, and DAGCombiner
9320 // probably shouldn't produce it, too.
9321 if (isa<ConstantSDNode>(M.getNode()))
9322 return SDValue();
9323
9324 // We can transform if the target has AndNot
9325 if (!TLI.hasAndNot(M))
9326 return SDValue();
9327
9328 SDLoc DL(N);
9329
9330 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9331 // a bitwise not that would already allow ANDN to be used.
9332 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9333 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9334 // If not, we need to do a bit more work to make sure andn is still used.
9335 SDValue NotX = DAG.getNOT(DL, X, VT);
9336 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9337 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9338 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9339 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9340 }
9341
9342 // If X is a constant and M is a bitwise not, check that 'andn' works with
9343 // immediates.
9344 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9345 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9346 // If not, we need to do a bit more work to make sure andn is still used.
9347 SDValue NotM = M.getOperand(0);
9348 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9349 SDValue NotY = DAG.getNOT(DL, Y, VT);
9350 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9351 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9352 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9353 }
9354
9355 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9356 SDValue NotM = DAG.getNOT(DL, M, VT);
9357 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9358
9359 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9360}
9361
9362SDValue DAGCombiner::visitXOR(SDNode *N) {
9363 SDValue N0 = N->getOperand(0);
9364 SDValue N1 = N->getOperand(1);
9365 EVT VT = N0.getValueType();
9366 SDLoc DL(N);
9367
9368 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9369 if (N0.isUndef() && N1.isUndef())
9370 return DAG.getConstant(0, DL, VT);
9371
9372 // fold (xor x, undef) -> undef
9373 if (N0.isUndef())
9374 return N0;
9375 if (N1.isUndef())
9376 return N1;
9377
9378 // fold (xor c1, c2) -> c1^c2
9379 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9380 return C;
9381
9382 // canonicalize constant to RHS
9385 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9386
9387 // fold vector ops
9388 if (VT.isVector()) {
9389 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9390 return FoldedVOp;
9391
9392 // fold (xor x, 0) -> x, vector edition
9394 return N0;
9395 }
9396
9397 // fold (xor x, 0) -> x
9398 if (isNullConstant(N1))
9399 return N0;
9400
9401 if (SDValue NewSel = foldBinOpIntoSelect(N))
9402 return NewSel;
9403
9404 // reassociate xor
9405 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9406 return RXOR;
9407
9408 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9409 if (SDValue SD =
9410 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9411 return SD;
9412
9413 // fold (a^b) -> (a|b) iff a and b share no bits.
9414 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9415 DAG.haveNoCommonBitsSet(N0, N1)) {
9417 Flags.setDisjoint(true);
9418 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9419 }
9420
9421 // look for 'add-like' folds:
9422 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9423 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9425 if (SDValue Combined = visitADDLike(N))
9426 return Combined;
9427
9428 // fold !(x cc y) -> (x !cc y)
9429 unsigned N0Opcode = N0.getOpcode();
9430 SDValue LHS, RHS, CC;
9431 if (TLI.isConstTrueVal(N1) &&
9432 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9433 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9434 LHS.getValueType());
9435 if (!LegalOperations ||
9436 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9437 switch (N0Opcode) {
9438 default:
9439 llvm_unreachable("Unhandled SetCC Equivalent!");
9440 case ISD::SETCC:
9441 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9442 case ISD::SELECT_CC:
9443 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9444 N0.getOperand(3), NotCC);
9445 case ISD::STRICT_FSETCC:
9446 case ISD::STRICT_FSETCCS: {
9447 if (N0.hasOneUse()) {
9448 // FIXME Can we handle multiple uses? Could we token factor the chain
9449 // results from the new/old setcc?
9450 SDValue SetCC =
9451 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9452 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9453 CombineTo(N, SetCC);
9454 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9455 recursivelyDeleteUnusedNodes(N0.getNode());
9456 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9457 }
9458 break;
9459 }
9460 }
9461 }
9462 }
9463
9464 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9465 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9466 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9467 SDValue V = N0.getOperand(0);
9468 SDLoc DL0(N0);
9469 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9470 DAG.getConstant(1, DL0, V.getValueType()));
9471 AddToWorklist(V.getNode());
9472 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9473 }
9474
9475 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9476 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9477 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9478 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9479 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9480 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9481 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9482 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9483 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9484 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9485 }
9486 }
9487 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9488 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9489 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9490 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9491 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9492 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9493 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9494 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9495 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9496 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9497 }
9498 }
9499
9500 // fold (not (neg x)) -> (add X, -1)
9501 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9502 // Y is a constant or the subtract has a single use.
9503 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9504 isNullConstant(N0.getOperand(0))) {
9505 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9506 DAG.getAllOnesConstant(DL, VT));
9507 }
9508
9509 // fold (not (add X, -1)) -> (neg X)
9510 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9512 return DAG.getNegative(N0.getOperand(0), DL, VT);
9513 }
9514
9515 // fold (xor (and x, y), y) -> (and (not x), y)
9516 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9517 SDValue X = N0.getOperand(0);
9518 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9519 AddToWorklist(NotX.getNode());
9520 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9521 }
9522
9523 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9524 if (!LegalOperations || hasOperation(ISD::ABS, VT)) {
9525 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9526 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9527 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9528 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9529 SDValue S0 = S.getOperand(0);
9530 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9532 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9533 return DAG.getNode(ISD::ABS, DL, VT, S0);
9534 }
9535 }
9536
9537 // fold (xor x, x) -> 0
9538 if (N0 == N1)
9539 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9540
9541 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9542 // Here is a concrete example of this equivalence:
9543 // i16 x == 14
9544 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9545 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9546 //
9547 // =>
9548 //
9549 // i16 ~1 == 0b1111111111111110
9550 // i16 rol(~1, 14) == 0b1011111111111111
9551 //
9552 // Some additional tips to help conceptualize this transform:
9553 // - Try to see the operation as placing a single zero in a value of all ones.
9554 // - There exists no value for x which would allow the result to contain zero.
9555 // - Values of x larger than the bitwidth are undefined and do not require a
9556 // consistent result.
9557 // - Pushing the zero left requires shifting one bits in from the right.
9558 // A rotate left of ~1 is a nice way of achieving the desired result.
9559 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9561 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9562 N0.getOperand(1));
9563 }
9564
9565 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9566 if (N0Opcode == N1.getOpcode())
9567 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9568 return V;
9569
9570 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9571 return R;
9572 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9573 return R;
9574 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9575 return R;
9576
9577 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9578 if (SDValue MM = unfoldMaskedMerge(N))
9579 return MM;
9580
9581 // Simplify the expression using non-local knowledge.
9583 return SDValue(N, 0);
9584
9585 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9586 return Combined;
9587
9588 return SDValue();
9589}
9590
9591/// If we have a shift-by-constant of a bitwise logic op that itself has a
9592/// shift-by-constant operand with identical opcode, we may be able to convert
9593/// that into 2 independent shifts followed by the logic op. This is a
9594/// throughput improvement.
9596 // Match a one-use bitwise logic op.
9597 SDValue LogicOp = Shift->getOperand(0);
9598 if (!LogicOp.hasOneUse())
9599 return SDValue();
9600
9601 unsigned LogicOpcode = LogicOp.getOpcode();
9602 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9603 LogicOpcode != ISD::XOR)
9604 return SDValue();
9605
9606 // Find a matching one-use shift by constant.
9607 unsigned ShiftOpcode = Shift->getOpcode();
9608 SDValue C1 = Shift->getOperand(1);
9609 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9610 assert(C1Node && "Expected a shift with constant operand");
9611 const APInt &C1Val = C1Node->getAPIntValue();
9612 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9613 const APInt *&ShiftAmtVal) {
9614 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9615 return false;
9616
9617 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9618 if (!ShiftCNode)
9619 return false;
9620
9621 // Capture the shifted operand and shift amount value.
9622 ShiftOp = V.getOperand(0);
9623 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9624
9625 // Shift amount types do not have to match their operand type, so check that
9626 // the constants are the same width.
9627 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9628 return false;
9629
9630 // The fold is not valid if the sum of the shift values doesn't fit in the
9631 // given shift amount type.
9632 bool Overflow = false;
9633 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9634 if (Overflow)
9635 return false;
9636
9637 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9638 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9639 return false;
9640
9641 return true;
9642 };
9643
9644 // Logic ops are commutative, so check each operand for a match.
9645 SDValue X, Y;
9646 const APInt *C0Val;
9647 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9648 Y = LogicOp.getOperand(1);
9649 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9650 Y = LogicOp.getOperand(0);
9651 else
9652 return SDValue();
9653
9654 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9655 SDLoc DL(Shift);
9656 EVT VT = Shift->getValueType(0);
9657 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9658 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9659 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9660 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9661 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9662 LogicOp->getFlags());
9663}
9664
9665/// Handle transforms common to the three shifts, when the shift amount is a
9666/// constant.
9667/// We are looking for: (shift being one of shl/sra/srl)
9668/// shift (binop X, C0), C1
9669/// And want to transform into:
9670/// binop (shift X, C1), (shift C0, C1)
9671SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9672 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9673
9674 // Do not turn a 'not' into a regular xor.
9675 if (isBitwiseNot(N->getOperand(0)))
9676 return SDValue();
9677
9678 // The inner binop must be one-use, since we want to replace it.
9679 SDValue LHS = N->getOperand(0);
9680 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9681 return SDValue();
9682
9683 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9684 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9685 return R;
9686
9687 // We want to pull some binops through shifts, so that we have (and (shift))
9688 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9689 // thing happens with address calculations, so it's important to canonicalize
9690 // it.
9691 switch (LHS.getOpcode()) {
9692 default:
9693 return SDValue();
9694 case ISD::OR:
9695 case ISD::XOR:
9696 case ISD::AND:
9697 break;
9698 case ISD::ADD:
9699 if (N->getOpcode() != ISD::SHL)
9700 return SDValue(); // only shl(add) not sr[al](add).
9701 break;
9702 }
9703
9704 // FIXME: disable this unless the input to the binop is a shift by a constant
9705 // or is copy/select. Enable this in other cases when figure out it's exactly
9706 // profitable.
9707 SDValue BinOpLHSVal = LHS.getOperand(0);
9708 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9709 BinOpLHSVal.getOpcode() == ISD::SRA ||
9710 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9711 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9712 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9713 BinOpLHSVal.getOpcode() == ISD::SELECT;
9714
9715 if (!IsShiftByConstant && !IsCopyOrSelect)
9716 return SDValue();
9717
9718 if (IsCopyOrSelect && N->hasOneUse())
9719 return SDValue();
9720
9721 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9722 SDLoc DL(N);
9723 EVT VT = N->getValueType(0);
9724 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9725 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9726 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9727 N->getOperand(1));
9728 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9729 }
9730
9731 return SDValue();
9732}
9733
9734SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9735 assert(N->getOpcode() == ISD::TRUNCATE);
9736 assert(N->getOperand(0).getOpcode() == ISD::AND);
9737
9738 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9739 EVT TruncVT = N->getValueType(0);
9740 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9741 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9742 SDValue N01 = N->getOperand(0).getOperand(1);
9743 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9744 SDLoc DL(N);
9745 SDValue N00 = N->getOperand(0).getOperand(0);
9746 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9747 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9748 AddToWorklist(Trunc00.getNode());
9749 AddToWorklist(Trunc01.getNode());
9750 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9751 }
9752 }
9753
9754 return SDValue();
9755}
9756
9757SDValue DAGCombiner::visitRotate(SDNode *N) {
9758 SDLoc dl(N);
9759 SDValue N0 = N->getOperand(0);
9760 SDValue N1 = N->getOperand(1);
9761 EVT VT = N->getValueType(0);
9762 unsigned Bitsize = VT.getScalarSizeInBits();
9763
9764 // fold (rot x, 0) -> x
9765 if (isNullOrNullSplat(N1))
9766 return N0;
9767
9768 // fold (rot x, c) -> x iff (c % BitSize) == 0
9769 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9770 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9771 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9772 return N0;
9773 }
9774
9775 // fold (rot x, c) -> (rot x, c % BitSize)
9776 bool OutOfRange = false;
9777 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9778 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9779 return true;
9780 };
9781 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9782 EVT AmtVT = N1.getValueType();
9783 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9784 if (SDValue Amt =
9785 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9786 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9787 }
9788
9789 // rot i16 X, 8 --> bswap X
9790 auto *RotAmtC = isConstOrConstSplat(N1);
9791 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9792 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9793 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9794
9795 // Simplify the operands using demanded-bits information.
9797 return SDValue(N, 0);
9798
9799 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9800 if (N1.getOpcode() == ISD::TRUNCATE &&
9801 N1.getOperand(0).getOpcode() == ISD::AND) {
9802 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9803 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9804 }
9805
9806 unsigned NextOp = N0.getOpcode();
9807
9808 // fold (rot* (rot* x, c2), c1)
9809 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9810 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9813 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9814 EVT ShiftVT = C1->getValueType(0);
9815 bool SameSide = (N->getOpcode() == NextOp);
9816 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9817 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9818 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9819 {N1, BitsizeC});
9820 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9821 {N0.getOperand(1), BitsizeC});
9822 if (Norm1 && Norm2)
9823 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9824 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9825 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9826 {CombinedShift, BitsizeC});
9827 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9828 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9829 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9830 CombinedShiftNorm);
9831 }
9832 }
9833 }
9834 return SDValue();
9835}
9836
9837SDValue DAGCombiner::visitSHL(SDNode *N) {
9838 SDValue N0 = N->getOperand(0);
9839 SDValue N1 = N->getOperand(1);
9840 if (SDValue V = DAG.simplifyShift(N0, N1))
9841 return V;
9842
9843 SDLoc DL(N);
9844 EVT VT = N0.getValueType();
9845 EVT ShiftVT = N1.getValueType();
9846 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9847
9848 // fold (shl c1, c2) -> c1<<c2
9849 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
9850 return C;
9851
9852 // fold vector ops
9853 if (VT.isVector()) {
9854 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9855 return FoldedVOp;
9856
9857 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9858 // If setcc produces all-one true value then:
9859 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9860 if (N1CV && N1CV->isConstant()) {
9861 if (N0.getOpcode() == ISD::AND) {
9862 SDValue N00 = N0->getOperand(0);
9863 SDValue N01 = N0->getOperand(1);
9864 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9865
9866 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9869 if (SDValue C =
9870 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
9871 return DAG.getNode(ISD::AND, DL, VT, N00, C);
9872 }
9873 }
9874 }
9875 }
9876
9877 if (SDValue NewSel = foldBinOpIntoSelect(N))
9878 return NewSel;
9879
9880 // if (shl x, c) is known to be zero, return 0
9881 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9882 return DAG.getConstant(0, DL, VT);
9883
9884 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9885 if (N1.getOpcode() == ISD::TRUNCATE &&
9886 N1.getOperand(0).getOpcode() == ISD::AND) {
9887 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9888 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
9889 }
9890
9891 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9892 if (N0.getOpcode() == ISD::SHL) {
9893 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9895 APInt c1 = LHS->getAPIntValue();
9896 APInt c2 = RHS->getAPIntValue();
9897 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9898 return (c1 + c2).uge(OpSizeInBits);
9899 };
9900 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9901 return DAG.getConstant(0, DL, VT);
9902
9903 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9905 APInt c1 = LHS->getAPIntValue();
9906 APInt c2 = RHS->getAPIntValue();
9907 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9908 return (c1 + c2).ult(OpSizeInBits);
9909 };
9910 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9911 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9912 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9913 }
9914 }
9915
9916 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9917 // For this to be valid, the second form must not preserve any of the bits
9918 // that are shifted out by the inner shift in the first form. This means
9919 // the outer shift size must be >= the number of bits added by the ext.
9920 // As a corollary, we don't care what kind of ext it is.
9921 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9922 N0.getOpcode() == ISD::ANY_EXTEND ||
9923 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9924 N0.getOperand(0).getOpcode() == ISD::SHL) {
9925 SDValue N0Op0 = N0.getOperand(0);
9926 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9927 EVT InnerVT = N0Op0.getValueType();
9928 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9929
9930 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9932 APInt c1 = LHS->getAPIntValue();
9933 APInt c2 = RHS->getAPIntValue();
9934 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9935 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9936 (c1 + c2).uge(OpSizeInBits);
9937 };
9938 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9939 /*AllowUndefs*/ false,
9940 /*AllowTypeMismatch*/ true))
9941 return DAG.getConstant(0, DL, VT);
9942
9943 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9945 APInt c1 = LHS->getAPIntValue();
9946 APInt c2 = RHS->getAPIntValue();
9947 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9948 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9949 (c1 + c2).ult(OpSizeInBits);
9950 };
9951 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9952 /*AllowUndefs*/ false,
9953 /*AllowTypeMismatch*/ true)) {
9954 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9955 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9956 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9957 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9958 }
9959 }
9960
9961 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9962 // Only fold this if the inner zext has no other uses to avoid increasing
9963 // the total number of instructions.
9964 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9965 N0.getOperand(0).getOpcode() == ISD::SRL) {
9966 SDValue N0Op0 = N0.getOperand(0);
9967 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9968
9969 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9970 APInt c1 = LHS->getAPIntValue();
9971 APInt c2 = RHS->getAPIntValue();
9972 zeroExtendToMatch(c1, c2);
9973 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9974 };
9975 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9976 /*AllowUndefs*/ false,
9977 /*AllowTypeMismatch*/ true)) {
9978 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9979 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9980 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9981 AddToWorklist(NewSHL.getNode());
9982 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9983 }
9984 }
9985
9986 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9987 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9989 const APInt &LHSC = LHS->getAPIntValue();
9990 const APInt &RHSC = RHS->getAPIntValue();
9991 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9992 LHSC.getZExtValue() <= RHSC.getZExtValue();
9993 };
9994
9995 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
9996 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9997 if (N0->getFlags().hasExact()) {
9998 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9999 /*AllowUndefs*/ false,
10000 /*AllowTypeMismatch*/ true)) {
10001 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10002 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10003 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10004 }
10005 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10006 /*AllowUndefs*/ false,
10007 /*AllowTypeMismatch*/ true)) {
10008 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10009 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10010 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10011 }
10012 }
10013
10014 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10015 // (and (srl x, (sub c1, c2), MASK)
10016 // Only fold this if the inner shift has no other uses -- if it does,
10017 // folding this will increase the total number of instructions.
10018 if (N0.getOpcode() == ISD::SRL &&
10019 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10021 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10022 /*AllowUndefs*/ false,
10023 /*AllowTypeMismatch*/ true)) {
10024 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10025 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10026 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10027 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10028 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10029 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10030 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10031 }
10032 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10033 /*AllowUndefs*/ false,
10034 /*AllowTypeMismatch*/ true)) {
10035 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10036 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10037 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10038 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10039 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10040 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10041 }
10042 }
10043 }
10044
10045 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10046 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10047 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10048 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10049 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10050 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10051 }
10052
10053 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10054 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10055 // Variant of version done on multiply, except mul by a power of 2 is turned
10056 // into a shift.
10057 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10058 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10059 SDValue N01 = N0.getOperand(1);
10060 if (SDValue Shl1 =
10061 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10062 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10063 AddToWorklist(Shl0.getNode());
10065 // Preserve the disjoint flag for Or.
10066 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10067 Flags.setDisjoint(true);
10068 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10069 }
10070 }
10071
10072 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10073 // TODO: Add zext/add_nuw variant with suitable test coverage
10074 // TODO: Should we limit this with isLegalAddImmediate?
10075 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10076 N0.getOperand(0).getOpcode() == ISD::ADD &&
10077 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10078 N0.getOperand(0)->hasOneUse() &&
10079 TLI.isDesirableToCommuteWithShift(N, Level)) {
10080 SDValue Add = N0.getOperand(0);
10081 SDLoc DL(N0);
10082 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10083 {Add.getOperand(1)})) {
10084 if (SDValue ShlC =
10085 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10086 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10087 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10088 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10089 }
10090 }
10091 }
10092
10093 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10094 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10095 SDValue N01 = N0.getOperand(1);
10096 if (SDValue Shl =
10097 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10098 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10099 }
10100
10102 if (N1C && !N1C->isOpaque())
10103 if (SDValue NewSHL = visitShiftByConstant(N))
10104 return NewSHL;
10105
10106 // fold (shl X, cttz(Y)) -> (mul (Y & -Y), X) if cttz is unsupported on the
10107 // target.
10108 if (((N1.getOpcode() == ISD::CTTZ &&
10109 VT.getScalarSizeInBits() <= ShiftVT.getScalarSizeInBits()) ||
10110 N1.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
10111 N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::CTTZ, ShiftVT) &&
10113 SDValue Y = N1.getOperand(0);
10114 SDLoc DL(N);
10115 SDValue NegY = DAG.getNegative(Y, DL, ShiftVT);
10116 SDValue And =
10117 DAG.getZExtOrTrunc(DAG.getNode(ISD::AND, DL, ShiftVT, Y, NegY), DL, VT);
10118 return DAG.getNode(ISD::MUL, DL, VT, And, N0);
10119 }
10120
10122 return SDValue(N, 0);
10123
10124 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10125 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10126 const APInt &C0 = N0.getConstantOperandAPInt(0);
10127 const APInt &C1 = N1C->getAPIntValue();
10128 return DAG.getVScale(DL, VT, C0 << C1);
10129 }
10130
10131 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10132 APInt ShlVal;
10133 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10134 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10135 const APInt &C0 = N0.getConstantOperandAPInt(0);
10136 if (ShlVal.ult(C0.getBitWidth())) {
10137 APInt NewStep = C0 << ShlVal;
10138 return DAG.getStepVector(DL, VT, NewStep);
10139 }
10140 }
10141
10142 return SDValue();
10143}
10144
10145// Transform a right shift of a multiply into a multiply-high.
10146// Examples:
10147// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10148// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10150 const TargetLowering &TLI) {
10151 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10152 "SRL or SRA node is required here!");
10153
10154 // Check the shift amount. Proceed with the transformation if the shift
10155 // amount is constant.
10156 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10157 if (!ShiftAmtSrc)
10158 return SDValue();
10159
10160 // The operation feeding into the shift must be a multiply.
10161 SDValue ShiftOperand = N->getOperand(0);
10162 if (ShiftOperand.getOpcode() != ISD::MUL)
10163 return SDValue();
10164
10165 // Both operands must be equivalent extend nodes.
10166 SDValue LeftOp = ShiftOperand.getOperand(0);
10167 SDValue RightOp = ShiftOperand.getOperand(1);
10168
10169 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10170 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10171
10172 if (!IsSignExt && !IsZeroExt)
10173 return SDValue();
10174
10175 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10176 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10177
10178 // return true if U may use the lower bits of its operands
10179 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10180 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10181 return true;
10182 }
10183 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10184 if (!UShiftAmtSrc) {
10185 return true;
10186 }
10187 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10188 return UShiftAmt < NarrowVTSize;
10189 };
10190
10191 // If the lower part of the MUL is also used and MUL_LOHI is supported
10192 // do not introduce the MULH in favor of MUL_LOHI
10193 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10194 if (!ShiftOperand.hasOneUse() &&
10195 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10196 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10197 return SDValue();
10198 }
10199
10200 SDValue MulhRightOp;
10202 unsigned ActiveBits = IsSignExt
10203 ? Constant->getAPIntValue().getSignificantBits()
10204 : Constant->getAPIntValue().getActiveBits();
10205 if (ActiveBits > NarrowVTSize)
10206 return SDValue();
10207 MulhRightOp = DAG.getConstant(
10208 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10209 NarrowVT);
10210 } else {
10211 if (LeftOp.getOpcode() != RightOp.getOpcode())
10212 return SDValue();
10213 // Check that the two extend nodes are the same type.
10214 if (NarrowVT != RightOp.getOperand(0).getValueType())
10215 return SDValue();
10216 MulhRightOp = RightOp.getOperand(0);
10217 }
10218
10219 EVT WideVT = LeftOp.getValueType();
10220 // Proceed with the transformation if the wide types match.
10221 assert((WideVT == RightOp.getValueType()) &&
10222 "Cannot have a multiply node with two different operand types.");
10223
10224 // Proceed with the transformation if the wide type is twice as large
10225 // as the narrow type.
10226 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10227 return SDValue();
10228
10229 // Check the shift amount with the narrow type size.
10230 // Proceed with the transformation if the shift amount is the width
10231 // of the narrow type.
10232 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10233 if (ShiftAmt != NarrowVTSize)
10234 return SDValue();
10235
10236 // If the operation feeding into the MUL is a sign extend (sext),
10237 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10238 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10239
10240 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10241 // or if it is a vector type then we could transform to an acceptable type and
10242 // rely on legalization to split/combine the result.
10243 if (NarrowVT.isVector()) {
10244 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10245 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10246 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10247 return SDValue();
10248 } else {
10249 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10250 return SDValue();
10251 }
10252
10253 SDValue Result =
10254 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10255 bool IsSigned = N->getOpcode() == ISD::SRA;
10256 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10257}
10258
10259// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10260// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10262 unsigned Opcode = N->getOpcode();
10263 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10264 return SDValue();
10265
10266 SDValue N0 = N->getOperand(0);
10267 EVT VT = N->getValueType(0);
10268 SDLoc DL(N);
10269 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10270 SDValue OldLHS = N0.getOperand(0);
10271 SDValue OldRHS = N0.getOperand(1);
10272
10273 // If both operands are bswap/bitreverse, ignore the multiuse
10274 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10275 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10276 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10277 OldRHS.getOperand(0));
10278 }
10279
10280 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10281 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10282 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10283 NewBitReorder);
10284 }
10285
10286 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10287 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10288 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10289 OldRHS.getOperand(0));
10290 }
10291 }
10292 return SDValue();
10293}
10294
10295SDValue DAGCombiner::visitSRA(SDNode *N) {
10296 SDValue N0 = N->getOperand(0);
10297 SDValue N1 = N->getOperand(1);
10298 if (SDValue V = DAG.simplifyShift(N0, N1))
10299 return V;
10300
10301 SDLoc DL(N);
10302 EVT VT = N0.getValueType();
10303 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10304
10305 // fold (sra c1, c2) -> (sra c1, c2)
10306 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10307 return C;
10308
10309 // Arithmetic shifting an all-sign-bit value is a no-op.
10310 // fold (sra 0, x) -> 0
10311 // fold (sra -1, x) -> -1
10312 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10313 return N0;
10314
10315 // fold vector ops
10316 if (VT.isVector())
10317 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10318 return FoldedVOp;
10319
10320 if (SDValue NewSel = foldBinOpIntoSelect(N))
10321 return NewSel;
10322
10324
10325 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10326 // clamp (add c1, c2) to max shift.
10327 if (N0.getOpcode() == ISD::SRA) {
10328 EVT ShiftVT = N1.getValueType();
10329 EVT ShiftSVT = ShiftVT.getScalarType();
10330 SmallVector<SDValue, 16> ShiftValues;
10331
10332 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10333 APInt c1 = LHS->getAPIntValue();
10334 APInt c2 = RHS->getAPIntValue();
10335 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10336 APInt Sum = c1 + c2;
10337 unsigned ShiftSum =
10338 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10339 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10340 return true;
10341 };
10342 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10343 SDValue ShiftValue;
10344 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10345 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10346 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10347 assert(ShiftValues.size() == 1 &&
10348 "Expected matchBinaryPredicate to return one element for "
10349 "SPLAT_VECTORs");
10350 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10351 } else
10352 ShiftValue = ShiftValues[0];
10353 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10354 }
10355 }
10356
10357 // fold (sra (shl X, m), (sub result_size, n))
10358 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10359 // result_size - n != m.
10360 // If truncate is free for the target sext(shl) is likely to result in better
10361 // code.
10362 if (N0.getOpcode() == ISD::SHL && N1C) {
10363 // Get the two constants of the shifts, CN0 = m, CN = n.
10364 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10365 if (N01C) {
10366 LLVMContext &Ctx = *DAG.getContext();
10367 // Determine what the truncate's result bitsize and type would be.
10368 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10369
10370 if (VT.isVector())
10371 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10372
10373 // Determine the residual right-shift amount.
10374 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10375
10376 // If the shift is not a no-op (in which case this should be just a sign
10377 // extend already), the truncated to type is legal, sign_extend is legal
10378 // on that type, and the truncate to that type is both legal and free,
10379 // perform the transform.
10380 if ((ShiftAmt > 0) &&
10383 TLI.isTruncateFree(VT, TruncVT)) {
10384 SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
10385 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10386 N0.getOperand(0), Amt);
10387 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10388 Shift);
10389 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10390 N->getValueType(0), Trunc);
10391 }
10392 }
10393 }
10394
10395 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10396 // sra (add (shl X, N1C), AddC), N1C -->
10397 // sext (add (trunc X to (width - N1C)), AddC')
10398 // sra (sub AddC, (shl X, N1C)), N1C -->
10399 // sext (sub AddC1',(trunc X to (width - N1C)))
10400 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10401 N0.hasOneUse()) {
10402 bool IsAdd = N0.getOpcode() == ISD::ADD;
10403 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10404 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10405 Shl.hasOneUse()) {
10406 // TODO: AddC does not need to be a splat.
10407 if (ConstantSDNode *AddC =
10408 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10409 // Determine what the truncate's type would be and ask the target if
10410 // that is a free operation.
10411 LLVMContext &Ctx = *DAG.getContext();
10412 unsigned ShiftAmt = N1C->getZExtValue();
10413 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10414 if (VT.isVector())
10415 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10416
10417 // TODO: The simple type check probably belongs in the default hook
10418 // implementation and/or target-specific overrides (because
10419 // non-simple types likely require masking when legalized), but
10420 // that restriction may conflict with other transforms.
10421 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10422 TLI.isTruncateFree(VT, TruncVT)) {
10423 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10424 SDValue ShiftC =
10425 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10426 TruncVT.getScalarSizeInBits()),
10427 DL, TruncVT);
10428 SDValue Add;
10429 if (IsAdd)
10430 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10431 else
10432 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10433 return DAG.getSExtOrTrunc(Add, DL, VT);
10434 }
10435 }
10436 }
10437 }
10438
10439 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10440 if (N1.getOpcode() == ISD::TRUNCATE &&
10441 N1.getOperand(0).getOpcode() == ISD::AND) {
10442 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10443 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10444 }
10445
10446 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10447 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10448 // if c1 is equal to the number of bits the trunc removes
10449 // TODO - support non-uniform vector shift amounts.
10450 if (N0.getOpcode() == ISD::TRUNCATE &&
10451 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10452 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10453 N0.getOperand(0).hasOneUse() &&
10454 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10455 SDValue N0Op0 = N0.getOperand(0);
10456 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10457 EVT LargeVT = N0Op0.getValueType();
10458 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10459 if (LargeShift->getAPIntValue() == TruncBits) {
10460 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10461 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10462 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10463 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10464 SDValue SRA =
10465 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10466 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10467 }
10468 }
10469 }
10470
10471 // Simplify, based on bits shifted out of the LHS.
10473 return SDValue(N, 0);
10474
10475 // If the sign bit is known to be zero, switch this to a SRL.
10476 if (DAG.SignBitIsZero(N0))
10477 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10478
10479 if (N1C && !N1C->isOpaque())
10480 if (SDValue NewSRA = visitShiftByConstant(N))
10481 return NewSRA;
10482
10483 // Try to transform this shift into a multiply-high if
10484 // it matches the appropriate pattern detected in combineShiftToMULH.
10485 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10486 return MULH;
10487
10488 // Attempt to convert a sra of a load into a narrower sign-extending load.
10489 if (SDValue NarrowLoad = reduceLoadWidth(N))
10490 return NarrowLoad;
10491
10492 return SDValue();
10493}
10494
10495SDValue DAGCombiner::visitSRL(SDNode *N) {
10496 SDValue N0 = N->getOperand(0);
10497 SDValue N1 = N->getOperand(1);
10498 if (SDValue V = DAG.simplifyShift(N0, N1))
10499 return V;
10500
10501 SDLoc DL(N);
10502 EVT VT = N0.getValueType();
10503 EVT ShiftVT = N1.getValueType();
10504 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10505
10506 // fold (srl c1, c2) -> c1 >>u c2
10507 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10508 return C;
10509
10510 // fold vector ops
10511 if (VT.isVector())
10512 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10513 return FoldedVOp;
10514
10515 if (SDValue NewSel = foldBinOpIntoSelect(N))
10516 return NewSel;
10517
10518 // if (srl x, c) is known to be zero, return 0
10520 if (N1C &&
10521 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10522 return DAG.getConstant(0, DL, VT);
10523
10524 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10525 if (N0.getOpcode() == ISD::SRL) {
10526 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10528 APInt c1 = LHS->getAPIntValue();
10529 APInt c2 = RHS->getAPIntValue();
10530 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10531 return (c1 + c2).uge(OpSizeInBits);
10532 };
10533 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10534 return DAG.getConstant(0, DL, VT);
10535
10536 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10538 APInt c1 = LHS->getAPIntValue();
10539 APInt c2 = RHS->getAPIntValue();
10540 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10541 return (c1 + c2).ult(OpSizeInBits);
10542 };
10543 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10544 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10545 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10546 }
10547 }
10548
10549 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10550 N0.getOperand(0).getOpcode() == ISD::SRL) {
10551 SDValue InnerShift = N0.getOperand(0);
10552 // TODO - support non-uniform vector shift amounts.
10553 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10554 uint64_t c1 = N001C->getZExtValue();
10555 uint64_t c2 = N1C->getZExtValue();
10556 EVT InnerShiftVT = InnerShift.getValueType();
10557 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10558 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10559 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10560 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10561 if (c1 + OpSizeInBits == InnerShiftSize) {
10562 if (c1 + c2 >= InnerShiftSize)
10563 return DAG.getConstant(0, DL, VT);
10564 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10565 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10566 InnerShift.getOperand(0), NewShiftAmt);
10567 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10568 }
10569 // In the more general case, we can clear the high bits after the shift:
10570 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10571 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10572 c1 + c2 < InnerShiftSize) {
10573 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10574 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10575 InnerShift.getOperand(0), NewShiftAmt);
10576 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10577 OpSizeInBits - c2),
10578 DL, InnerShiftVT);
10579 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10580 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10581 }
10582 }
10583 }
10584
10585 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10586 // (and (srl x, (sub c2, c1), MASK)
10587 if (N0.getOpcode() == ISD::SHL &&
10588 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10590 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10592 const APInt &LHSC = LHS->getAPIntValue();
10593 const APInt &RHSC = RHS->getAPIntValue();
10594 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10595 LHSC.getZExtValue() <= RHSC.getZExtValue();
10596 };
10597 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10598 /*AllowUndefs*/ false,
10599 /*AllowTypeMismatch*/ true)) {
10600 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10601 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10602 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10603 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10604 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10605 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10606 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10607 }
10608 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10609 /*AllowUndefs*/ false,
10610 /*AllowTypeMismatch*/ true)) {
10611 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10612 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10613 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10614 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10615 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10616 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10617 }
10618 }
10619
10620 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10621 // TODO - support non-uniform vector shift amounts.
10622 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10623 // Shifting in all undef bits?
10624 EVT SmallVT = N0.getOperand(0).getValueType();
10625 unsigned BitSize = SmallVT.getScalarSizeInBits();
10626 if (N1C->getAPIntValue().uge(BitSize))
10627 return DAG.getUNDEF(VT);
10628
10629 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10630 uint64_t ShiftAmt = N1C->getZExtValue();
10631 SDLoc DL0(N0);
10632 SDValue SmallShift =
10633 DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0),
10634 DAG.getShiftAmountConstant(ShiftAmt, SmallVT, DL0));
10635 AddToWorklist(SmallShift.getNode());
10636 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10637 return DAG.getNode(ISD::AND, DL, VT,
10638 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10639 DAG.getConstant(Mask, DL, VT));
10640 }
10641 }
10642
10643 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10644 // bit, which is unmodified by sra.
10645 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10646 if (N0.getOpcode() == ISD::SRA)
10647 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10648 }
10649
10650 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10651 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10652 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10653 isPowerOf2_32(OpSizeInBits) &&
10654 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10655 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10656
10657 // If any of the input bits are KnownOne, then the input couldn't be all
10658 // zeros, thus the result of the srl will always be zero.
10659 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10660
10661 // If all of the bits input the to ctlz node are known to be zero, then
10662 // the result of the ctlz is "32" and the result of the shift is one.
10663 APInt UnknownBits = ~Known.Zero;
10664 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10665
10666 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10667 if (UnknownBits.isPowerOf2()) {
10668 // Okay, we know that only that the single bit specified by UnknownBits
10669 // could be set on input to the CTLZ node. If this bit is set, the SRL
10670 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10671 // to an SRL/XOR pair, which is likely to simplify more.
10672 unsigned ShAmt = UnknownBits.countr_zero();
10673 SDValue Op = N0.getOperand(0);
10674
10675 if (ShAmt) {
10676 SDLoc DL(N0);
10677 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10678 DAG.getShiftAmountConstant(ShAmt, VT, DL));
10679 AddToWorklist(Op.getNode());
10680 }
10681 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10682 }
10683 }
10684
10685 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10686 if (N1.getOpcode() == ISD::TRUNCATE &&
10687 N1.getOperand(0).getOpcode() == ISD::AND) {
10688 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10689 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10690 }
10691
10692 // fold operands of srl based on knowledge that the low bits are not
10693 // demanded.
10695 return SDValue(N, 0);
10696
10697 if (N1C && !N1C->isOpaque())
10698 if (SDValue NewSRL = visitShiftByConstant(N))
10699 return NewSRL;
10700
10701 // Attempt to convert a srl of a load into a narrower zero-extending load.
10702 if (SDValue NarrowLoad = reduceLoadWidth(N))
10703 return NarrowLoad;
10704
10705 // Here is a common situation. We want to optimize:
10706 //
10707 // %a = ...
10708 // %b = and i32 %a, 2
10709 // %c = srl i32 %b, 1
10710 // brcond i32 %c ...
10711 //
10712 // into
10713 //
10714 // %a = ...
10715 // %b = and %a, 2
10716 // %c = setcc eq %b, 0
10717 // brcond %c ...
10718 //
10719 // However when after the source operand of SRL is optimized into AND, the SRL
10720 // itself may not be optimized further. Look for it and add the BRCOND into
10721 // the worklist.
10722 //
10723 // The also tends to happen for binary operations when SimplifyDemandedBits
10724 // is involved.
10725 //
10726 // FIXME: This is unecessary if we process the DAG in topological order,
10727 // which we plan to do. This workaround can be removed once the DAG is
10728 // processed in topological order.
10729 if (N->hasOneUse()) {
10730 SDNode *Use = *N->use_begin();
10731
10732 // Look pass the truncate.
10733 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10734 Use = *Use->use_begin();
10735
10736 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10737 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10738 AddToWorklist(Use);
10739 }
10740
10741 // Try to transform this shift into a multiply-high if
10742 // it matches the appropriate pattern detected in combineShiftToMULH.
10743 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10744 return MULH;
10745
10746 return SDValue();
10747}
10748
10749SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10750 EVT VT = N->getValueType(0);
10751 SDValue N0 = N->getOperand(0);
10752 SDValue N1 = N->getOperand(1);
10753 SDValue N2 = N->getOperand(2);
10754 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10755 unsigned BitWidth = VT.getScalarSizeInBits();
10756 SDLoc DL(N);
10757
10758 // fold (fshl N0, N1, 0) -> N0
10759 // fold (fshr N0, N1, 0) -> N1
10761 if (DAG.MaskedValueIsZero(
10762 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10763 return IsFSHL ? N0 : N1;
10764
10765 auto IsUndefOrZero = [](SDValue V) {
10766 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10767 };
10768
10769 // TODO - support non-uniform vector shift amounts.
10770 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10771 EVT ShAmtTy = N2.getValueType();
10772
10773 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10774 if (Cst->getAPIntValue().uge(BitWidth)) {
10775 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10776 return DAG.getNode(N->getOpcode(), DL, VT, N0, N1,
10777 DAG.getConstant(RotAmt, DL, ShAmtTy));
10778 }
10779
10780 unsigned ShAmt = Cst->getZExtValue();
10781 if (ShAmt == 0)
10782 return IsFSHL ? N0 : N1;
10783
10784 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10785 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10786 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10787 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10788 if (IsUndefOrZero(N0))
10789 return DAG.getNode(
10790 ISD::SRL, DL, VT, N1,
10791 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, DL, ShAmtTy));
10792 if (IsUndefOrZero(N1))
10793 return DAG.getNode(
10794 ISD::SHL, DL, VT, N0,
10795 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, DL, ShAmtTy));
10796
10797 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10798 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10799 // TODO - bigendian support once we have test coverage.
10800 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10801 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10802 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10803 !DAG.getDataLayout().isBigEndian()) {
10804 auto *LHS = dyn_cast<LoadSDNode>(N0);
10805 auto *RHS = dyn_cast<LoadSDNode>(N1);
10806 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10807 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10808 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10809 ISD::isNON_EXTLoad(LHS)) {
10810 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10811 SDLoc DL(RHS);
10812 uint64_t PtrOff =
10813 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10814 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10815 unsigned Fast = 0;
10816 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10817 RHS->getAddressSpace(), NewAlign,
10818 RHS->getMemOperand()->getFlags(), &Fast) &&
10819 Fast) {
10820 SDValue NewPtr = DAG.getMemBasePlusOffset(
10821 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10822 AddToWorklist(NewPtr.getNode());
10823 SDValue Load = DAG.getLoad(
10824 VT, DL, RHS->getChain(), NewPtr,
10825 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10826 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10827 // Replace the old load's chain with the new load's chain.
10828 WorklistRemover DeadNodes(*this);
10829 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10830 return Load;
10831 }
10832 }
10833 }
10834 }
10835 }
10836
10837 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10838 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10839 // iff We know the shift amount is in range.
10840 // TODO: when is it worth doing SUB(BW, N2) as well?
10841 if (isPowerOf2_32(BitWidth)) {
10842 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10843 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10844 return DAG.getNode(ISD::SRL, DL, VT, N1, N2);
10845 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10846 return DAG.getNode(ISD::SHL, DL, VT, N0, N2);
10847 }
10848
10849 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10850 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10851 // TODO: Investigate flipping this rotate if only one is legal.
10852 // If funnel shift is legal as well we might be better off avoiding
10853 // non-constant (BW - N2).
10854 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10855 if (N0 == N1 && hasOperation(RotOpc, VT))
10856 return DAG.getNode(RotOpc, DL, VT, N0, N2);
10857
10858 // Simplify, based on bits shifted out of N0/N1.
10860 return SDValue(N, 0);
10861
10862 return SDValue();
10863}
10864
10865SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10866 SDValue N0 = N->getOperand(0);
10867 SDValue N1 = N->getOperand(1);
10868 if (SDValue V = DAG.simplifyShift(N0, N1))
10869 return V;
10870
10871 SDLoc DL(N);
10872 EVT VT = N0.getValueType();
10873
10874 // fold (*shlsat c1, c2) -> c1<<c2
10875 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
10876 return C;
10877
10879
10880 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10881 // fold (sshlsat x, c) -> (shl x, c)
10882 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10883 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10884 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10885
10886 // fold (ushlsat x, c) -> (shl x, c)
10887 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10888 N1C->getAPIntValue().ule(
10890 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10891 }
10892
10893 return SDValue();
10894}
10895
10896// Given a ABS node, detect the following patterns:
10897// (ABS (SUB (EXTEND a), (EXTEND b))).
10898// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10899// Generates UABD/SABD instruction.
10900SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10901 EVT SrcVT = N->getValueType(0);
10902
10903 if (N->getOpcode() == ISD::TRUNCATE)
10904 N = N->getOperand(0).getNode();
10905
10906 if (N->getOpcode() != ISD::ABS)
10907 return SDValue();
10908
10909 EVT VT = N->getValueType(0);
10910 SDValue AbsOp1 = N->getOperand(0);
10911 SDValue Op0, Op1;
10912
10913 if (AbsOp1.getOpcode() != ISD::SUB)
10914 return SDValue();
10915
10916 Op0 = AbsOp1.getOperand(0);
10917 Op1 = AbsOp1.getOperand(1);
10918
10919 unsigned Opc0 = Op0.getOpcode();
10920
10921 // Check if the operands of the sub are (zero|sign)-extended.
10922 // TODO: Should we use ValueTracking instead?
10923 if (Opc0 != Op1.getOpcode() ||
10924 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10925 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10926 // fold (abs (sub nsw x, y)) -> abds(x, y)
10927 // Don't fold this for unsupported types as we lose the NSW handling.
10928 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10929 TLI.preferABDSToABSWithNSW(VT)) {
10930 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10931 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10932 }
10933 return SDValue();
10934 }
10935
10936 EVT VT0, VT1;
10937 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10938 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10939 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10940 } else {
10941 VT0 = Op0.getOperand(0).getValueType();
10942 VT1 = Op1.getOperand(0).getValueType();
10943 }
10944 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10945
10946 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10947 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10948 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10949 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10950 (VT1 == MaxVT || Op1->hasOneUse()) &&
10951 (!LegalTypes || hasOperation(ABDOpcode, MaxVT))) {
10952 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10953 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10954 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10955 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10956 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10957 }
10958
10959 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10960 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10961 if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
10962 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10963 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10964 }
10965
10966 return SDValue();
10967}
10968
10969SDValue DAGCombiner::visitABS(SDNode *N) {
10970 SDValue N0 = N->getOperand(0);
10971 EVT VT = N->getValueType(0);
10972 SDLoc DL(N);
10973
10974 // fold (abs c1) -> c2
10975 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10976 return C;
10977 // fold (abs (abs x)) -> (abs x)
10978 if (N0.getOpcode() == ISD::ABS)
10979 return N0;
10980 // fold (abs x) -> x iff not-negative
10981 if (DAG.SignBitIsZero(N0))
10982 return N0;
10983
10984 if (SDValue ABD = foldABSToABD(N, DL))
10985 return ABD;
10986
10987 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10988 // iff zero_extend/truncate are free.
10989 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10990 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10991 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10992 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10993 hasOperation(ISD::ABS, ExtVT)) {
10994 return DAG.getNode(
10995 ISD::ZERO_EXTEND, DL, VT,
10996 DAG.getNode(ISD::ABS, DL, ExtVT,
10997 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10998 }
10999 }
11000
11001 return SDValue();
11002}
11003
11004SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11005 SDValue N0 = N->getOperand(0);
11006 EVT VT = N->getValueType(0);
11007 SDLoc DL(N);
11008
11009 // fold (bswap c1) -> c2
11010 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11011 return C;
11012 // fold (bswap (bswap x)) -> x
11013 if (N0.getOpcode() == ISD::BSWAP)
11014 return N0.getOperand(0);
11015
11016 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11017 // isn't supported, it will be expanded to bswap followed by a manual reversal
11018 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11019 // the two bswaps if the bitreverse gets expanded.
11020 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11021 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11022 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11023 }
11024
11025 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11026 // iff x >= bw/2 (i.e. lower half is known zero)
11027 unsigned BW = VT.getScalarSizeInBits();
11028 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11029 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11030 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11031 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11032 ShAmt->getZExtValue() >= (BW / 2) &&
11033 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11034 TLI.isTruncateFree(VT, HalfVT) &&
11035 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11036 SDValue Res = N0.getOperand(0);
11037 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11038 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11039 DAG.getShiftAmountConstant(NewShAmt, VT, DL));
11040 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11041 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11042 return DAG.getZExtOrTrunc(Res, DL, VT);
11043 }
11044 }
11045
11046 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11047 // inverse-shift-of-bswap:
11048 // bswap (X u<< C) --> (bswap X) u>> C
11049 // bswap (X u>> C) --> (bswap X) u<< C
11050 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11051 N0.hasOneUse()) {
11052 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11053 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11054 ShAmt->getZExtValue() % 8 == 0) {
11055 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11056 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11057 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11058 }
11059 }
11060
11061 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11062 return V;
11063
11064 return SDValue();
11065}
11066
11067SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11068 SDValue N0 = N->getOperand(0);
11069 EVT VT = N->getValueType(0);
11070 SDLoc DL(N);
11071
11072 // fold (bitreverse c1) -> c2
11073 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11074 return C;
11075
11076 // fold (bitreverse (bitreverse x)) -> x
11077 if (N0.getOpcode() == ISD::BITREVERSE)
11078 return N0.getOperand(0);
11079
11080 SDValue X, Y;
11081
11082 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11083 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11085 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11086
11087 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11088 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11090 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11091
11092 return SDValue();
11093}
11094
11095SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11096 SDValue N0 = N->getOperand(0);
11097 EVT VT = N->getValueType(0);
11098 SDLoc DL(N);
11099
11100 // fold (ctlz c1) -> c2
11101 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11102 return C;
11103
11104 // If the value is known never to be zero, switch to the undef version.
11105 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11106 if (DAG.isKnownNeverZero(N0))
11107 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11108
11109 return SDValue();
11110}
11111
11112SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11113 SDValue N0 = N->getOperand(0);
11114 EVT VT = N->getValueType(0);
11115 SDLoc DL(N);
11116
11117 // fold (ctlz_zero_undef c1) -> c2
11118 if (SDValue C =
11120 return C;
11121 return SDValue();
11122}
11123
11124SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11125 SDValue N0 = N->getOperand(0);
11126 EVT VT = N->getValueType(0);
11127 SDLoc DL(N);
11128
11129 // fold (cttz c1) -> c2
11130 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11131 return C;
11132
11133 // If the value is known never to be zero, switch to the undef version.
11134 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11135 if (DAG.isKnownNeverZero(N0))
11136 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11137
11138 return SDValue();
11139}
11140
11141SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11142 SDValue N0 = N->getOperand(0);
11143 EVT VT = N->getValueType(0);
11144 SDLoc DL(N);
11145
11146 // fold (cttz_zero_undef c1) -> c2
11147 if (SDValue C =
11149 return C;
11150 return SDValue();
11151}
11152
11153SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11154 SDValue N0 = N->getOperand(0);
11155 EVT VT = N->getValueType(0);
11156 unsigned NumBits = VT.getScalarSizeInBits();
11157 SDLoc DL(N);
11158
11159 // fold (ctpop c1) -> c2
11160 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11161 return C;
11162
11163 // If the source is being shifted, but doesn't affect any active bits,
11164 // then we can call CTPOP on the shift source directly.
11165 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11166 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11167 const APInt &Amt = AmtC->getAPIntValue();
11168 if (Amt.ult(NumBits)) {
11169 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11170 if ((N0.getOpcode() == ISD::SRL &&
11171 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11172 (N0.getOpcode() == ISD::SHL &&
11173 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11174 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11175 }
11176 }
11177 }
11178 }
11179
11180 // If the upper bits are known to be zero, then see if its profitable to
11181 // only count the lower bits.
11182 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11183 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11184 if (hasOperation(ISD::CTPOP, HalfVT) &&
11185 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11186 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11187 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11188 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11189 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11190 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11191 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11192 }
11193 }
11194 }
11195
11196 return SDValue();
11197}
11198
11200 SDValue RHS, const SDNodeFlags Flags,
11201 const TargetLowering &TLI) {
11202 EVT VT = LHS.getValueType();
11203 if (!VT.isFloatingPoint())
11204 return false;
11205
11206 const TargetOptions &Options = DAG.getTarget().Options;
11207
11208 return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11210 (Flags.hasNoNaNs() ||
11211 (DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
11212}
11213
11215 SDValue RHS, SDValue True, SDValue False,
11217 const TargetLowering &TLI,
11218 SelectionDAG &DAG) {
11219 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11220 switch (CC) {
11221 case ISD::SETOLT:
11222 case ISD::SETOLE:
11223 case ISD::SETLT:
11224 case ISD::SETLE:
11225 case ISD::SETULT:
11226 case ISD::SETULE: {
11227 // Since it's known never nan to get here already, either fminnum or
11228 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11229 // expanded in terms of it.
11230 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11231 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11232 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11233
11234 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11235 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11236 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11237 return SDValue();
11238 }
11239 case ISD::SETOGT:
11240 case ISD::SETOGE:
11241 case ISD::SETGT:
11242 case ISD::SETGE:
11243 case ISD::SETUGT:
11244 case ISD::SETUGE: {
11245 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11246 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11247 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11248
11249 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11250 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11251 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11252 return SDValue();
11253 }
11254 default:
11255 return SDValue();
11256 }
11257}
11258
11259/// Generate Min/Max node
11260SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11261 SDValue RHS, SDValue True,
11262 SDValue False, ISD::CondCode CC) {
11263 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11264 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11265
11266 // If we can't directly match this, try to see if we can pull an fneg out of
11267 // the select.
11269 True, DAG, LegalOperations, ForCodeSize);
11270 if (!NegTrue)
11271 return SDValue();
11272
11273 HandleSDNode NegTrueHandle(NegTrue);
11274
11275 // Try to unfold an fneg from the select if we are comparing the negated
11276 // constant.
11277 //
11278 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11279 //
11280 // TODO: Handle fabs
11281 if (LHS == NegTrue) {
11282 // If we can't directly match this, try to see if we can pull an fneg out of
11283 // the select.
11285 RHS, DAG, LegalOperations, ForCodeSize);
11286 if (NegRHS) {
11287 HandleSDNode NegRHSHandle(NegRHS);
11288 if (NegRHS == False) {
11289 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11290 False, CC, TLI, DAG);
11291 if (Combined)
11292 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11293 }
11294 }
11295 }
11296
11297 return SDValue();
11298}
11299
11300/// If a (v)select has a condition value that is a sign-bit test, try to smear
11301/// the condition operand sign-bit across the value width and use it as a mask.
11303 SelectionDAG &DAG) {
11304 SDValue Cond = N->getOperand(0);
11305 SDValue C1 = N->getOperand(1);
11306 SDValue C2 = N->getOperand(2);
11308 return SDValue();
11309
11310 EVT VT = N->getValueType(0);
11311 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11312 VT != Cond.getOperand(0).getValueType())
11313 return SDValue();
11314
11315 // The inverted-condition + commuted-select variants of these patterns are
11316 // canonicalized to these forms in IR.
11317 SDValue X = Cond.getOperand(0);
11318 SDValue CondC = Cond.getOperand(1);
11319 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11320 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11322 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11323 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11324 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11325 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11326 }
11327 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11328 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11329 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11330 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11331 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11332 }
11333 return SDValue();
11334}
11335
11337 const TargetLowering &TLI) {
11338 if (!TLI.convertSelectOfConstantsToMath(VT))
11339 return false;
11340
11341 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11342 return true;
11344 return true;
11345
11346 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11347 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11348 return true;
11349 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11350 return true;
11351
11352 return false;
11353}
11354
11355SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11356 SDValue Cond = N->getOperand(0);
11357 SDValue N1 = N->getOperand(1);
11358 SDValue N2 = N->getOperand(2);
11359 EVT VT = N->getValueType(0);
11360 EVT CondVT = Cond.getValueType();
11361 SDLoc DL(N);
11362
11363 if (!VT.isInteger())
11364 return SDValue();
11365
11366 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11367 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11368 if (!C1 || !C2)
11369 return SDValue();
11370
11371 if (CondVT != MVT::i1 || LegalOperations) {
11372 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11373 // We can't do this reliably if integer based booleans have different contents
11374 // to floating point based booleans. This is because we can't tell whether we
11375 // have an integer-based boolean or a floating-point-based boolean unless we
11376 // can find the SETCC that produced it and inspect its operands. This is
11377 // fairly easy if C is the SETCC node, but it can potentially be
11378 // undiscoverable (or not reasonably discoverable). For example, it could be
11379 // in another basic block or it could require searching a complicated
11380 // expression.
11381 if (CondVT.isInteger() &&
11382 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11384 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11386 C1->isZero() && C2->isOne()) {
11387 SDValue NotCond =
11388 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11389 if (VT.bitsEq(CondVT))
11390 return NotCond;
11391 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11392 }
11393
11394 return SDValue();
11395 }
11396
11397 // Only do this before legalization to avoid conflicting with target-specific
11398 // transforms in the other direction (create a select from a zext/sext). There
11399 // is also a target-independent combine here in DAGCombiner in the other
11400 // direction for (select Cond, -1, 0) when the condition is not i1.
11401 assert(CondVT == MVT::i1 && !LegalOperations);
11402
11403 // select Cond, 1, 0 --> zext (Cond)
11404 if (C1->isOne() && C2->isZero())
11405 return DAG.getZExtOrTrunc(Cond, DL, VT);
11406
11407 // select Cond, -1, 0 --> sext (Cond)
11408 if (C1->isAllOnes() && C2->isZero())
11409 return DAG.getSExtOrTrunc(Cond, DL, VT);
11410
11411 // select Cond, 0, 1 --> zext (!Cond)
11412 if (C1->isZero() && C2->isOne()) {
11413 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11414 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11415 return NotCond;
11416 }
11417
11418 // select Cond, 0, -1 --> sext (!Cond)
11419 if (C1->isZero() && C2->isAllOnes()) {
11420 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11421 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11422 return NotCond;
11423 }
11424
11425 // Use a target hook because some targets may prefer to transform in the
11426 // other direction.
11428 return SDValue();
11429
11430 // For any constants that differ by 1, we can transform the select into
11431 // an extend and add.
11432 const APInt &C1Val = C1->getAPIntValue();
11433 const APInt &C2Val = C2->getAPIntValue();
11434
11435 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11436 if (C1Val - 1 == C2Val) {
11437 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11438 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11439 }
11440
11441 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11442 if (C1Val + 1 == C2Val) {
11443 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11444 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11445 }
11446
11447 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11448 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11449 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11450 SDValue ShAmtC =
11451 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11452 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11453 }
11454
11455 // select Cond, -1, C --> or (sext Cond), C
11456 if (C1->isAllOnes()) {
11457 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11458 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11459 }
11460
11461 // select Cond, C, -1 --> or (sext (not Cond)), C
11462 if (C2->isAllOnes()) {
11463 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11464 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11465 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11466 }
11467
11469 return V;
11470
11471 return SDValue();
11472}
11473
11474template <class MatchContextClass>
11476 SelectionDAG &DAG) {
11477 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11478 N->getOpcode() == ISD::VP_SELECT) &&
11479 "Expected a (v)(vp.)select");
11480 SDValue Cond = N->getOperand(0);
11481 SDValue T = N->getOperand(1), F = N->getOperand(2);
11482 EVT VT = N->getValueType(0);
11483 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11484 MatchContextClass matcher(DAG, TLI, N);
11485
11486 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11487 return SDValue();
11488
11489 // select Cond, Cond, F --> or Cond, freeze(F)
11490 // select Cond, 1, F --> or Cond, freeze(F)
11491 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11492 return matcher.getNode(ISD::OR, DL, VT, Cond, DAG.getFreeze(F));
11493
11494 // select Cond, T, Cond --> and Cond, freeze(T)
11495 // select Cond, T, 0 --> and Cond, freeze(T)
11496 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11497 return matcher.getNode(ISD::AND, DL, VT, Cond, DAG.getFreeze(T));
11498
11499 // select Cond, T, 1 --> or (not Cond), freeze(T)
11500 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11501 SDValue NotCond =
11502 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11503 return matcher.getNode(ISD::OR, DL, VT, NotCond, DAG.getFreeze(T));
11504 }
11505
11506 // select Cond, 0, F --> and (not Cond), freeze(F)
11507 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11508 SDValue NotCond =
11509 matcher.getNode(ISD::XOR, DL, VT, Cond, DAG.getAllOnesConstant(DL, VT));
11510 return matcher.getNode(ISD::AND, DL, VT, NotCond, DAG.getFreeze(F));
11511 }
11512
11513 return SDValue();
11514}
11515
11517 SDValue N0 = N->getOperand(0);
11518 SDValue N1 = N->getOperand(1);
11519 SDValue N2 = N->getOperand(2);
11520 EVT VT = N->getValueType(0);
11521
11522 SDValue Cond0, Cond1;
11524 if (!sd_match(N0, m_OneUse(m_SetCC(m_Value(Cond0), m_Value(Cond1),
11525 m_CondCode(CC)))) ||
11526 VT != Cond0.getValueType())
11527 return SDValue();
11528
11529 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11530 // compare is inverted from that pattern ("Cond0 s> -1").
11531 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11532 ; // This is the pattern we are looking for.
11533 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11534 std::swap(N1, N2);
11535 else
11536 return SDValue();
11537
11538 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & freeze(N1)
11539 if (isNullOrNullSplat(N2)) {
11540 SDLoc DL(N);
11541 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11542 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11543 return DAG.getNode(ISD::AND, DL, VT, Sra, DAG.getFreeze(N1));
11544 }
11545
11546 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | freeze(N2)
11547 if (isAllOnesOrAllOnesSplat(N1)) {
11548 SDLoc DL(N);
11549 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11550 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11551 return DAG.getNode(ISD::OR, DL, VT, Sra, DAG.getFreeze(N2));
11552 }
11553
11554 // If we have to invert the sign bit mask, only do that transform if the
11555 // target has a bitwise 'and not' instruction (the invert is free).
11556 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & freeze(N2)
11557 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11558 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11559 SDLoc DL(N);
11560 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11561 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11562 SDValue Not = DAG.getNOT(DL, Sra, VT);
11563 return DAG.getNode(ISD::AND, DL, VT, Not, DAG.getFreeze(N2));
11564 }
11565
11566 // TODO: There's another pattern in this family, but it may require
11567 // implementing hasOrNot() to check for profitability:
11568 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | freeze(N2)
11569
11570 return SDValue();
11571}
11572
11573// Match SELECTs with absolute difference patterns.
11574// (select (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
11575// (select (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
11576// (select (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
11577// (select (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
11578SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
11579 SDValue False, ISD::CondCode CC,
11580 const SDLoc &DL) {
11581 bool IsSigned = isSignedIntSetCC(CC);
11582 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
11583 EVT VT = LHS.getValueType();
11584
11585 if (LegalOperations && !hasOperation(ABDOpc, VT))
11586 return SDValue();
11587
11588 switch (CC) {
11589 case ISD::SETGT:
11590 case ISD::SETGE:
11591 case ISD::SETUGT:
11592 case ISD::SETUGE:
11593 if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11594 sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
11595 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11596 break;
11597 case ISD::SETLT:
11598 case ISD::SETLE:
11599 case ISD::SETULT:
11600 case ISD::SETULE:
11601 if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11602 sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
11603 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11604 break;
11605 default:
11606 break;
11607 }
11608
11609 return SDValue();
11610}
11611
11612SDValue DAGCombiner::visitSELECT(SDNode *N) {
11613 SDValue N0 = N->getOperand(0);
11614 SDValue N1 = N->getOperand(1);
11615 SDValue N2 = N->getOperand(2);
11616 EVT VT = N->getValueType(0);
11617 EVT VT0 = N0.getValueType();
11618 SDLoc DL(N);
11619 SDNodeFlags Flags = N->getFlags();
11620
11621 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11622 return V;
11623
11624 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
11625 return V;
11626
11627 // select (not Cond), N1, N2 -> select Cond, N2, N1
11628 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11629 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11630 SelectOp->setFlags(Flags);
11631 return SelectOp;
11632 }
11633
11634 if (SDValue V = foldSelectOfConstants(N))
11635 return V;
11636
11637 // If we can fold this based on the true/false value, do so.
11638 if (SimplifySelectOps(N, N1, N2))
11639 return SDValue(N, 0); // Don't revisit N.
11640
11641 if (VT0 == MVT::i1) {
11642 // The code in this block deals with the following 2 equivalences:
11643 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11644 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11645 // The target can specify its preferred form with the
11646 // shouldNormalizeToSelectSequence() callback. However we always transform
11647 // to the right anyway if we find the inner select exists in the DAG anyway
11648 // and we always transform to the left side if we know that we can further
11649 // optimize the combination of the conditions.
11650 bool normalizeToSequence =
11652 // select (and Cond0, Cond1), X, Y
11653 // -> select Cond0, (select Cond1, X, Y), Y
11654 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11655 SDValue Cond0 = N0->getOperand(0);
11656 SDValue Cond1 = N0->getOperand(1);
11657 SDValue InnerSelect =
11658 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11659 if (normalizeToSequence || !InnerSelect.use_empty())
11660 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11661 InnerSelect, N2, Flags);
11662 // Cleanup on failure.
11663 if (InnerSelect.use_empty())
11664 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11665 }
11666 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11667 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11668 SDValue Cond0 = N0->getOperand(0);
11669 SDValue Cond1 = N0->getOperand(1);
11670 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11671 Cond1, N1, N2, Flags);
11672 if (normalizeToSequence || !InnerSelect.use_empty())
11673 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11674 InnerSelect, Flags);
11675 // Cleanup on failure.
11676 if (InnerSelect.use_empty())
11677 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11678 }
11679
11680 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11681 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11682 SDValue N1_0 = N1->getOperand(0);
11683 SDValue N1_1 = N1->getOperand(1);
11684 SDValue N1_2 = N1->getOperand(2);
11685 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11686 // Create the actual and node if we can generate good code for it.
11687 if (!normalizeToSequence) {
11688 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11689 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11690 N2, Flags);
11691 }
11692 // Otherwise see if we can optimize the "and" to a better pattern.
11693 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11694 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11695 N2, Flags);
11696 }
11697 }
11698 }
11699 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11700 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11701 SDValue N2_0 = N2->getOperand(0);
11702 SDValue N2_1 = N2->getOperand(1);
11703 SDValue N2_2 = N2->getOperand(2);
11704 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11705 // Create the actual or node if we can generate good code for it.
11706 if (!normalizeToSequence) {
11707 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11708 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11709 N2_2, Flags);
11710 }
11711 // Otherwise see if we can optimize to a better pattern.
11712 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11713 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11714 N2_2, Flags);
11715 }
11716 }
11717 }
11718
11719 // Fold selects based on a setcc into other things, such as min/max/abs.
11720 if (N0.getOpcode() == ISD::SETCC) {
11721 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11722 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11723
11724 // select (fcmp lt x, y), x, y -> fminnum x, y
11725 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11726 //
11727 // This is OK if we don't care what happens if either operand is a NaN.
11728 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, Flags, TLI))
11729 if (SDValue FMinMax =
11730 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11731 return FMinMax;
11732
11733 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11734 // This is conservatively limited to pre-legal-operations to give targets
11735 // a chance to reverse the transform if they want to do that. Also, it is
11736 // unlikely that the pattern would be formed late, so it's probably not
11737 // worth going through the other checks.
11738 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11739 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11740 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11741 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11742 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11743 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11744 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11745 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11746 //
11747 // The IR equivalent of this transform would have this form:
11748 // %a = add %x, C
11749 // %c = icmp ugt %x, ~C
11750 // %r = select %c, -1, %a
11751 // =>
11752 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11753 // %u0 = extractvalue %u, 0
11754 // %u1 = extractvalue %u, 1
11755 // %r = select %u1, -1, %u0
11756 SDVTList VTs = DAG.getVTList(VT, VT0);
11757 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11758 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11759 }
11760 }
11761
11762 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11763 (!LegalOperations &&
11765 // Any flags available in a select/setcc fold will be on the setcc as they
11766 // migrated from fcmp
11767 Flags = N0->getFlags();
11768 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11769 N2, N0.getOperand(2));
11770 SelectNode->setFlags(Flags);
11771 return SelectNode;
11772 }
11773
11774 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11775 return NewSel;
11776 }
11777
11778 if (!VT.isVector())
11779 if (SDValue BinOp = foldSelectOfBinops(N))
11780 return BinOp;
11781
11782 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11783 return R;
11784
11785 return SDValue();
11786}
11787
11788// This function assumes all the vselect's arguments are CONCAT_VECTOR
11789// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11791 SDLoc DL(N);
11792 SDValue Cond = N->getOperand(0);
11793 SDValue LHS = N->getOperand(1);
11794 SDValue RHS = N->getOperand(2);
11795 EVT VT = N->getValueType(0);
11796 int NumElems = VT.getVectorNumElements();
11797 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11798 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11799 Cond.getOpcode() == ISD::BUILD_VECTOR);
11800
11801 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11802 // binary ones here.
11803 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11804 return SDValue();
11805
11806 // We're sure we have an even number of elements due to the
11807 // concat_vectors we have as arguments to vselect.
11808 // Skip BV elements until we find one that's not an UNDEF
11809 // After we find an UNDEF element, keep looping until we get to half the
11810 // length of the BV and see if all the non-undef nodes are the same.
11811 ConstantSDNode *BottomHalf = nullptr;
11812 for (int i = 0; i < NumElems / 2; ++i) {
11813 if (Cond->getOperand(i)->isUndef())
11814 continue;
11815
11816 if (BottomHalf == nullptr)
11817 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11818 else if (Cond->getOperand(i).getNode() != BottomHalf)
11819 return SDValue();
11820 }
11821
11822 // Do the same for the second half of the BuildVector
11823 ConstantSDNode *TopHalf = nullptr;
11824 for (int i = NumElems / 2; i < NumElems; ++i) {
11825 if (Cond->getOperand(i)->isUndef())
11826 continue;
11827
11828 if (TopHalf == nullptr)
11829 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11830 else if (Cond->getOperand(i).getNode() != TopHalf)
11831 return SDValue();
11832 }
11833
11834 assert(TopHalf && BottomHalf &&
11835 "One half of the selector was all UNDEFs and the other was all the "
11836 "same value. This should have been addressed before this function.");
11837 return DAG.getNode(
11839 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11840 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11841}
11842
11843bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11844 SelectionDAG &DAG, const SDLoc &DL) {
11845
11846 // Only perform the transformation when existing operands can be reused.
11847 if (IndexIsScaled)
11848 return false;
11849
11850 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11851 return false;
11852
11853 EVT VT = BasePtr.getValueType();
11854
11855 if (SDValue SplatVal = DAG.getSplatValue(Index);
11856 SplatVal && !isNullConstant(SplatVal) &&
11857 SplatVal.getValueType() == VT) {
11858 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11859 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11860 return true;
11861 }
11862
11863 if (Index.getOpcode() != ISD::ADD)
11864 return false;
11865
11866 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11867 SplatVal && SplatVal.getValueType() == VT) {
11868 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11869 Index = Index.getOperand(1);
11870 return true;
11871 }
11872 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11873 SplatVal && SplatVal.getValueType() == VT) {
11874 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11875 Index = Index.getOperand(0);
11876 return true;
11877 }
11878 return false;
11879}
11880
11881// Fold sext/zext of index into index type.
11883 SelectionDAG &DAG) {
11884 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11885
11886 // It's always safe to look through zero extends.
11887 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11888 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11889 IndexType = ISD::UNSIGNED_SCALED;
11890 Index = Index.getOperand(0);
11891 return true;
11892 }
11893 if (ISD::isIndexTypeSigned(IndexType)) {
11894 IndexType = ISD::UNSIGNED_SCALED;
11895 return true;
11896 }
11897 }
11898
11899 // It's only safe to look through sign extends when Index is signed.
11900 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11901 ISD::isIndexTypeSigned(IndexType) &&
11902 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11903 Index = Index.getOperand(0);
11904 return true;
11905 }
11906
11907 return false;
11908}
11909
11910SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11911 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11912 SDValue Mask = MSC->getMask();
11913 SDValue Chain = MSC->getChain();
11914 SDValue Index = MSC->getIndex();
11915 SDValue Scale = MSC->getScale();
11916 SDValue StoreVal = MSC->getValue();
11917 SDValue BasePtr = MSC->getBasePtr();
11918 SDValue VL = MSC->getVectorLength();
11919 ISD::MemIndexType IndexType = MSC->getIndexType();
11920 SDLoc DL(N);
11921
11922 // Zap scatters with a zero mask.
11924 return Chain;
11925
11926 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11927 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11928 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11929 DL, Ops, MSC->getMemOperand(), IndexType);
11930 }
11931
11932 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11933 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11934 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11935 DL, Ops, MSC->getMemOperand(), IndexType);
11936 }
11937
11938 return SDValue();
11939}
11940
11941SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11942 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11943 SDValue Mask = MSC->getMask();
11944 SDValue Chain = MSC->getChain();
11945 SDValue Index = MSC->getIndex();
11946 SDValue Scale = MSC->getScale();
11947 SDValue StoreVal = MSC->getValue();
11948 SDValue BasePtr = MSC->getBasePtr();
11949 ISD::MemIndexType IndexType = MSC->getIndexType();
11950 SDLoc DL(N);
11951
11952 // Zap scatters with a zero mask.
11954 return Chain;
11955
11956 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11957 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11958 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11959 DL, Ops, MSC->getMemOperand(), IndexType,
11960 MSC->isTruncatingStore());
11961 }
11962
11963 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11964 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11965 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11966 DL, Ops, MSC->getMemOperand(), IndexType,
11967 MSC->isTruncatingStore());
11968 }
11969
11970 return SDValue();
11971}
11972
11973SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11974 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11975 SDValue Mask = MST->getMask();
11976 SDValue Chain = MST->getChain();
11977 SDValue Value = MST->getValue();
11978 SDValue Ptr = MST->getBasePtr();
11979 SDLoc DL(N);
11980
11981 // Zap masked stores with a zero mask.
11983 return Chain;
11984
11985 // Remove a masked store if base pointers and masks are equal.
11986 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11987 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11988 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11989 !MST->getBasePtr().isUndef() &&
11990 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11991 MST1->getMemoryVT().getStoreSize()) ||
11993 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11994 MST->getMemoryVT().getStoreSize())) {
11995 CombineTo(MST1, MST1->getChain());
11996 if (N->getOpcode() != ISD::DELETED_NODE)
11997 AddToWorklist(N);
11998 return SDValue(N, 0);
11999 }
12000 }
12001
12002 // If this is a masked load with an all ones mask, we can use a unmasked load.
12003 // FIXME: Can we do this for indexed, compressing, or truncating stores?
12004 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
12005 !MST->isCompressingStore() && !MST->isTruncatingStore())
12006 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
12007 MST->getBasePtr(), MST->getPointerInfo(),
12008 MST->getOriginalAlign(),
12009 MST->getMemOperand()->getFlags(), MST->getAAInfo());
12010
12011 // Try transforming N to an indexed store.
12012 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12013 return SDValue(N, 0);
12014
12015 if (MST->isTruncatingStore() && MST->isUnindexed() &&
12016 Value.getValueType().isInteger() &&
12017 (!isa<ConstantSDNode>(Value) ||
12018 !cast<ConstantSDNode>(Value)->isOpaque())) {
12019 APInt TruncDemandedBits =
12020 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
12022
12023 // See if we can simplify the operation with
12024 // SimplifyDemandedBits, which only works if the value has a single use.
12025 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
12026 // Re-visit the store if anything changed and the store hasn't been merged
12027 // with another node (N is deleted) SimplifyDemandedBits will add Value's
12028 // node back to the worklist if necessary, but we also need to re-visit
12029 // the Store node itself.
12030 if (N->getOpcode() != ISD::DELETED_NODE)
12031 AddToWorklist(N);
12032 return SDValue(N, 0);
12033 }
12034 }
12035
12036 // If this is a TRUNC followed by a masked store, fold this into a masked
12037 // truncating store. We can do this even if this is already a masked
12038 // truncstore.
12039 // TODO: Try combine to masked compress store if possiable.
12040 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
12041 MST->isUnindexed() && !MST->isCompressingStore() &&
12042 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
12043 MST->getMemoryVT(), LegalOperations)) {
12044 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
12045 Value.getOperand(0).getValueType());
12046 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12047 MST->getOffset(), Mask, MST->getMemoryVT(),
12048 MST->getMemOperand(), MST->getAddressingMode(),
12049 /*IsTruncating=*/true);
12050 }
12051
12052 return SDValue();
12053}
12054
12055SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12056 auto *SST = cast<VPStridedStoreSDNode>(N);
12057 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12058 // Combine strided stores with unit-stride to a regular VP store.
12059 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12060 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12061 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12062 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12063 SST->getVectorLength(), SST->getMemoryVT(),
12064 SST->getMemOperand(), SST->getAddressingMode(),
12065 SST->isTruncatingStore(), SST->isCompressingStore());
12066 }
12067 return SDValue();
12068}
12069
12070SDValue DAGCombiner::visitVECTOR_COMPRESS(SDNode *N) {
12071 SDLoc DL(N);
12072 SDValue Vec = N->getOperand(0);
12073 SDValue Mask = N->getOperand(1);
12074 SDValue Passthru = N->getOperand(2);
12075 EVT VecVT = Vec.getValueType();
12076
12077 bool HasPassthru = !Passthru.isUndef();
12078
12079 APInt SplatVal;
12080 if (ISD::isConstantSplatVector(Mask.getNode(), SplatVal))
12081 return TLI.isConstTrueVal(Mask) ? Vec : Passthru;
12082
12083 if (Vec.isUndef() || Mask.isUndef())
12084 return Passthru;
12085
12086 // No need for potentially expensive compress if the mask is constant.
12089 EVT ScalarVT = VecVT.getVectorElementType();
12090 unsigned NumSelected = 0;
12091 unsigned NumElmts = VecVT.getVectorNumElements();
12092 for (unsigned I = 0; I < NumElmts; ++I) {
12093 SDValue MaskI = Mask.getOperand(I);
12094 // We treat undef mask entries as "false".
12095 if (MaskI.isUndef())
12096 continue;
12097
12098 if (TLI.isConstTrueVal(MaskI)) {
12099 SDValue VecI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec,
12100 DAG.getVectorIdxConstant(I, DL));
12101 Ops.push_back(VecI);
12102 NumSelected++;
12103 }
12104 }
12105 for (unsigned Rest = NumSelected; Rest < NumElmts; ++Rest) {
12106 SDValue Val =
12107 HasPassthru
12108 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Passthru,
12109 DAG.getVectorIdxConstant(Rest, DL))
12110 : DAG.getUNDEF(ScalarVT);
12111 Ops.push_back(Val);
12112 }
12113 return DAG.getBuildVector(VecVT, DL, Ops);
12114 }
12115
12116 return SDValue();
12117}
12118
12119SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12120 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12121 SDValue Mask = MGT->getMask();
12122 SDValue Chain = MGT->getChain();
12123 SDValue Index = MGT->getIndex();
12124 SDValue Scale = MGT->getScale();
12125 SDValue BasePtr = MGT->getBasePtr();
12126 SDValue VL = MGT->getVectorLength();
12127 ISD::MemIndexType IndexType = MGT->getIndexType();
12128 SDLoc DL(N);
12129
12130 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12131 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12132 return DAG.getGatherVP(
12133 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12134 Ops, MGT->getMemOperand(), IndexType);
12135 }
12136
12137 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12138 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12139 return DAG.getGatherVP(
12140 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12141 Ops, MGT->getMemOperand(), IndexType);
12142 }
12143
12144 return SDValue();
12145}
12146
12147SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12148 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12149 SDValue Mask = MGT->getMask();
12150 SDValue Chain = MGT->getChain();
12151 SDValue Index = MGT->getIndex();
12152 SDValue Scale = MGT->getScale();
12153 SDValue PassThru = MGT->getPassThru();
12154 SDValue BasePtr = MGT->getBasePtr();
12155 ISD::MemIndexType IndexType = MGT->getIndexType();
12156 SDLoc DL(N);
12157
12158 // Zap gathers with a zero mask.
12160 return CombineTo(N, PassThru, MGT->getChain());
12161
12162 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12163 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12164 return DAG.getMaskedGather(
12165 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12166 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12167 }
12168
12169 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12170 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12171 return DAG.getMaskedGather(
12172 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12173 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12174 }
12175
12176 return SDValue();
12177}
12178
12179SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12180 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12181 SDValue Mask = MLD->getMask();
12182 SDLoc DL(N);
12183
12184 // Zap masked loads with a zero mask.
12186 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12187
12188 // If this is a masked load with an all ones mask, we can use a unmasked load.
12189 // FIXME: Can we do this for indexed, expanding, or extending loads?
12190 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12191 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12192 SDValue NewLd = DAG.getLoad(
12193 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12194 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12195 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12196 return CombineTo(N, NewLd, NewLd.getValue(1));
12197 }
12198
12199 // Try transforming N to an indexed load.
12200 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12201 return SDValue(N, 0);
12202
12203 return SDValue();
12204}
12205
12206SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12207 auto *SLD = cast<VPStridedLoadSDNode>(N);
12208 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12209 // Combine strided loads with unit-stride to a regular VP load.
12210 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12211 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12212 SDValue NewLd = DAG.getLoadVP(
12213 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12214 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12215 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12216 SLD->getMemOperand(), SLD->isExpandingLoad());
12217 return CombineTo(N, NewLd, NewLd.getValue(1));
12218 }
12219 return SDValue();
12220}
12221
12222/// A vector select of 2 constant vectors can be simplified to math/logic to
12223/// avoid a variable select instruction and possibly avoid constant loads.
12224SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12225 SDValue Cond = N->getOperand(0);
12226 SDValue N1 = N->getOperand(1);
12227 SDValue N2 = N->getOperand(2);
12228 EVT VT = N->getValueType(0);
12229 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12233 return SDValue();
12234
12235 // Check if we can use the condition value to increment/decrement a single
12236 // constant value. This simplifies a select to an add and removes a constant
12237 // load/materialization from the general case.
12238 bool AllAddOne = true;
12239 bool AllSubOne = true;
12240 unsigned Elts = VT.getVectorNumElements();
12241 for (unsigned i = 0; i != Elts; ++i) {
12242 SDValue N1Elt = N1.getOperand(i);
12243 SDValue N2Elt = N2.getOperand(i);
12244 if (N1Elt.isUndef() || N2Elt.isUndef())
12245 continue;
12246 if (N1Elt.getValueType() != N2Elt.getValueType()) {
12247 AllAddOne = false;
12248 AllSubOne = false;
12249 break;
12250 }
12251
12252 const APInt &C1 = N1Elt->getAsAPIntVal();
12253 const APInt &C2 = N2Elt->getAsAPIntVal();
12254 if (C1 != C2 + 1)
12255 AllAddOne = false;
12256 if (C1 != C2 - 1)
12257 AllSubOne = false;
12258 }
12259
12260 // Further simplifications for the extra-special cases where the constants are
12261 // all 0 or all -1 should be implemented as folds of these patterns.
12262 SDLoc DL(N);
12263 if (AllAddOne || AllSubOne) {
12264 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12265 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12266 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12267 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12268 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12269 }
12270
12271 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12272 APInt Pow2C;
12273 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12274 isNullOrNullSplat(N2)) {
12275 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12276 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12277 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12278 }
12279
12281 return V;
12282
12283 // The general case for select-of-constants:
12284 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12285 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12286 // leave that to a machine-specific pass.
12287 return SDValue();
12288}
12289
12290SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12291 SDValue N0 = N->getOperand(0);
12292 SDValue N1 = N->getOperand(1);
12293 SDValue N2 = N->getOperand(2);
12294 SDLoc DL(N);
12295
12296 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12297 return V;
12298
12299 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DL, DAG))
12300 return V;
12301
12302 return SDValue();
12303}
12304
12305SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12306 SDValue N0 = N->getOperand(0);
12307 SDValue N1 = N->getOperand(1);
12308 SDValue N2 = N->getOperand(2);
12309 EVT VT = N->getValueType(0);
12310 SDLoc DL(N);
12311
12312 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12313 return V;
12314
12315 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
12316 return V;
12317
12318 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12319 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12320 return DAG.getSelect(DL, VT, F, N2, N1);
12321
12322 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12323 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12326 TLI.getBooleanContents(N0.getValueType()) ==
12328 return DAG.getNode(
12329 ISD::ADD, DL, N1.getValueType(), N2,
12330 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12331 }
12332
12333 // Canonicalize integer abs.
12334 // vselect (setg[te] X, 0), X, -X ->
12335 // vselect (setgt X, -1), X, -X ->
12336 // vselect (setl[te] X, 0), -X, X ->
12337 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12338 if (N0.getOpcode() == ISD::SETCC) {
12339 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12340 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12341 bool isAbs = false;
12342 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12343
12344 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12345 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12346 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12348 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12349 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12351
12352 if (isAbs) {
12354 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12355
12356 SDValue Shift = DAG.getNode(
12357 ISD::SRA, DL, VT, LHS,
12358 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, DL));
12359 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12360 AddToWorklist(Shift.getNode());
12361 AddToWorklist(Add.getNode());
12362 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12363 }
12364
12365 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12366 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12367 //
12368 // This is OK if we don't care about what happens if either operand is a
12369 // NaN.
12370 //
12371 if (N0.hasOneUse() &&
12372 isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, N->getFlags(), TLI)) {
12373 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12374 return FMinMax;
12375 }
12376
12377 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12378 return S;
12379 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12380 return S;
12381
12382 // If this select has a condition (setcc) with narrower operands than the
12383 // select, try to widen the compare to match the select width.
12384 // TODO: This should be extended to handle any constant.
12385 // TODO: This could be extended to handle non-loading patterns, but that
12386 // requires thorough testing to avoid regressions.
12387 if (isNullOrNullSplat(RHS)) {
12388 EVT NarrowVT = LHS.getValueType();
12390 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12391 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12392 unsigned WideWidth = WideVT.getScalarSizeInBits();
12393 bool IsSigned = isSignedIntSetCC(CC);
12394 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12395 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12396 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12397 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12398 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12399 // Both compare operands can be widened for free. The LHS can use an
12400 // extended load, and the RHS is a constant:
12401 // vselect (ext (setcc load(X), C)), N1, N2 -->
12402 // vselect (setcc extload(X), C'), N1, N2
12403 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12404 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12405 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12406 EVT WideSetCCVT = getSetCCResultType(WideVT);
12407 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12408 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12409 }
12410 }
12411
12412 if (SDValue ABD = foldSelectToABD(LHS, RHS, N1, N2, CC, DL))
12413 return ABD;
12414
12415 // Match VSELECTs into add with unsigned saturation.
12416 if (hasOperation(ISD::UADDSAT, VT)) {
12417 // Check if one of the arms of the VSELECT is vector with all bits set.
12418 // If it's on the left side invert the predicate to simplify logic below.
12419 SDValue Other;
12420 ISD::CondCode SatCC = CC;
12422 Other = N2;
12423 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12424 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12425 Other = N1;
12426 }
12427
12428 if (Other && Other.getOpcode() == ISD::ADD) {
12429 SDValue CondLHS = LHS, CondRHS = RHS;
12430 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12431
12432 // Canonicalize condition operands.
12433 if (SatCC == ISD::SETUGE) {
12434 std::swap(CondLHS, CondRHS);
12435 SatCC = ISD::SETULE;
12436 }
12437
12438 // We can test against either of the addition operands.
12439 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12440 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12441 if (SatCC == ISD::SETULE && Other == CondRHS &&
12442 (OpLHS == CondLHS || OpRHS == CondLHS))
12443 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12444
12445 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12446 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12447 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12448 CondLHS == OpLHS) {
12449 // If the RHS is a constant we have to reverse the const
12450 // canonicalization.
12451 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12452 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12453 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12454 };
12455 if (SatCC == ISD::SETULE &&
12456 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12457 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12458 }
12459 }
12460 }
12461
12462 // Match VSELECTs into sub with unsigned saturation.
12463 if (hasOperation(ISD::USUBSAT, VT)) {
12464 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12465 // the left side invert the predicate to simplify logic below.
12466 SDValue Other;
12467 ISD::CondCode SatCC = CC;
12469 Other = N2;
12470 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12472 Other = N1;
12473 }
12474
12475 // zext(x) >= y ? trunc(zext(x) - y) : 0
12476 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12477 // zext(x) > y ? trunc(zext(x) - y) : 0
12478 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12479 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12480 Other.getOperand(0).getOpcode() == ISD::SUB &&
12481 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12482 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12483 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12484 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12485 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12486 DAG, DL))
12487 return R;
12488 }
12489
12490 if (Other && Other.getNumOperands() == 2) {
12491 SDValue CondRHS = RHS;
12492 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12493
12494 if (OpLHS == LHS) {
12495 // Look for a general sub with unsigned saturation first.
12496 // x >= y ? x-y : 0 --> usubsat x, y
12497 // x > y ? x-y : 0 --> usubsat x, y
12498 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12499 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12500 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12501
12502 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12503 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12504 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12505 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12506 // If the RHS is a constant we have to reverse the const
12507 // canonicalization.
12508 // x > C-1 ? x+-C : 0 --> usubsat x, C
12509 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12510 return (!Op && !Cond) ||
12511 (Op && Cond &&
12512 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12513 };
12514 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12515 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12516 /*AllowUndefs*/ true)) {
12517 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12518 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12519 }
12520
12521 // Another special case: If C was a sign bit, the sub has been
12522 // canonicalized into a xor.
12523 // FIXME: Would it be better to use computeKnownBits to
12524 // determine whether it's safe to decanonicalize the xor?
12525 // x s< 0 ? x^C : 0 --> usubsat x, C
12526 APInt SplatValue;
12527 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12528 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12530 SplatValue.isSignMask()) {
12531 // Note that we have to rebuild the RHS constant here to
12532 // ensure we don't rely on particular values of undef lanes.
12533 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12534 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12535 }
12536 }
12537 }
12538 }
12539 }
12540 }
12541 }
12542
12543 if (SimplifySelectOps(N, N1, N2))
12544 return SDValue(N, 0); // Don't revisit N.
12545
12546 // Fold (vselect all_ones, N1, N2) -> N1
12548 return N1;
12549 // Fold (vselect all_zeros, N1, N2) -> N2
12551 return N2;
12552
12553 // The ConvertSelectToConcatVector function is assuming both the above
12554 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12555 // and addressed.
12556 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12559 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12560 return CV;
12561 }
12562
12563 if (SDValue V = foldVSelectOfConstants(N))
12564 return V;
12565
12566 if (hasOperation(ISD::SRA, VT))
12568 return V;
12569
12571 return SDValue(N, 0);
12572
12573 return SDValue();
12574}
12575
12576SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12577 SDValue N0 = N->getOperand(0);
12578 SDValue N1 = N->getOperand(1);
12579 SDValue N2 = N->getOperand(2);
12580 SDValue N3 = N->getOperand(3);
12581 SDValue N4 = N->getOperand(4);
12582 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12583 SDLoc DL(N);
12584
12585 // fold select_cc lhs, rhs, x, x, cc -> x
12586 if (N2 == N3)
12587 return N2;
12588
12589 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12590 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12591 isNullConstant(N1))
12592 return DAG.getSelect(DL, N2.getValueType(), N0, N3, N2);
12593
12594 // Determine if the condition we're dealing with is constant
12595 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12596 CC, DL, false)) {
12597 AddToWorklist(SCC.getNode());
12598
12599 // cond always true -> true val
12600 // cond always false -> false val
12601 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12602 return SCCC->isZero() ? N3 : N2;
12603
12604 // When the condition is UNDEF, just return the first operand. This is
12605 // coherent the DAG creation, no setcc node is created in this case
12606 if (SCC->isUndef())
12607 return N2;
12608
12609 // Fold to a simpler select_cc
12610 if (SCC.getOpcode() == ISD::SETCC) {
12611 SDValue SelectOp =
12612 DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0),
12613 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12614 SelectOp->setFlags(SCC->getFlags());
12615 return SelectOp;
12616 }
12617 }
12618
12619 // If we can fold this based on the true/false value, do so.
12620 if (SimplifySelectOps(N, N2, N3))
12621 return SDValue(N, 0); // Don't revisit N.
12622
12623 // fold select_cc into other things, such as min/max/abs
12624 return SimplifySelectCC(DL, N0, N1, N2, N3, CC);
12625}
12626
12627SDValue DAGCombiner::visitSETCC(SDNode *N) {
12628 // setcc is very commonly used as an argument to brcond. This pattern
12629 // also lend itself to numerous combines and, as a result, it is desired
12630 // we keep the argument to a brcond as a setcc as much as possible.
12631 bool PreferSetCC =
12632 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12633
12634 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12635 EVT VT = N->getValueType(0);
12636 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12637 SDLoc DL(N);
12638
12639 if (SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, DL, !PreferSetCC)) {
12640 // If we prefer to have a setcc, and we don't, we'll try our best to
12641 // recreate one using rebuildSetCC.
12642 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12643 SDValue NewSetCC = rebuildSetCC(Combined);
12644
12645 // We don't have anything interesting to combine to.
12646 if (NewSetCC.getNode() == N)
12647 return SDValue();
12648
12649 if (NewSetCC)
12650 return NewSetCC;
12651 }
12652 return Combined;
12653 }
12654
12655 // Optimize
12656 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12657 // or
12658 // 2) (icmp eq/ne X, (rotate X, C1))
12659 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12660 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12661 // Then:
12662 // If C1 is a power of 2, then the rotate and shift+and versions are
12663 // equivilent, so we can interchange them depending on target preference.
12664 // Otherwise, if we have the shift+and version we can interchange srl/shl
12665 // which inturn affects the constant C0. We can use this to get better
12666 // constants again determined by target preference.
12667 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12668 auto IsAndWithShift = [](SDValue A, SDValue B) {
12669 return A.getOpcode() == ISD::AND &&
12670 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12671 A.getOperand(0) == B.getOperand(0);
12672 };
12673 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12674 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12675 B.getOperand(0) == A;
12676 };
12677 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12678 bool IsRotate = false;
12679
12680 // Find either shift+and or rotate pattern.
12681 if (IsAndWithShift(N0, N1)) {
12682 AndOrOp = N0;
12683 ShiftOrRotate = N1;
12684 } else if (IsAndWithShift(N1, N0)) {
12685 AndOrOp = N1;
12686 ShiftOrRotate = N0;
12687 } else if (IsRotateWithOp(N0, N1)) {
12688 IsRotate = true;
12689 AndOrOp = N0;
12690 ShiftOrRotate = N1;
12691 } else if (IsRotateWithOp(N1, N0)) {
12692 IsRotate = true;
12693 AndOrOp = N1;
12694 ShiftOrRotate = N0;
12695 }
12696
12697 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12698 (IsRotate || AndOrOp.hasOneUse())) {
12699 EVT OpVT = N0.getValueType();
12700 // Get constant shift/rotate amount and possibly mask (if its shift+and
12701 // variant).
12702 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12703 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12704 /*AllowTrunc*/ false);
12705 if (CNode == nullptr)
12706 return std::nullopt;
12707 return CNode->getAPIntValue();
12708 };
12709 std::optional<APInt> AndCMask =
12710 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12711 std::optional<APInt> ShiftCAmt =
12712 GetAPIntValue(ShiftOrRotate.getOperand(1));
12713 unsigned NumBits = OpVT.getScalarSizeInBits();
12714
12715 // We found constants.
12716 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12717 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12718 // Check that the constants meet the constraints.
12719 bool CanTransform = IsRotate;
12720 if (!CanTransform) {
12721 // Check that mask and shift compliment eachother
12722 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12723 // Check that we are comparing all bits
12724 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12725 // Check that the and mask is correct for the shift
12726 CanTransform &=
12727 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12728 }
12729
12730 // See if target prefers another shift/rotate opcode.
12731 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12732 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12733 // Transform is valid and we have a new preference.
12734 if (CanTransform && NewShiftOpc != ShiftOpc) {
12735 SDValue NewShiftOrRotate =
12736 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12737 ShiftOrRotate.getOperand(1));
12738 SDValue NewAndOrOp = SDValue();
12739
12740 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12741 APInt NewMask =
12742 NewShiftOpc == ISD::SHL
12743 ? APInt::getHighBitsSet(NumBits,
12744 NumBits - ShiftCAmt->getZExtValue())
12745 : APInt::getLowBitsSet(NumBits,
12746 NumBits - ShiftCAmt->getZExtValue());
12747 NewAndOrOp =
12748 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12749 DAG.getConstant(NewMask, DL, OpVT));
12750 } else {
12751 NewAndOrOp = ShiftOrRotate.getOperand(0);
12752 }
12753
12754 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12755 }
12756 }
12757 }
12758 }
12759 return SDValue();
12760}
12761
12762SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12763 SDValue LHS = N->getOperand(0);
12764 SDValue RHS = N->getOperand(1);
12765 SDValue Carry = N->getOperand(2);
12766 SDValue Cond = N->getOperand(3);
12767
12768 // If Carry is false, fold to a regular SETCC.
12769 if (isNullConstant(Carry))
12770 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12771
12772 return SDValue();
12773}
12774
12775/// Check if N satisfies:
12776/// N is used once.
12777/// N is a Load.
12778/// The load is compatible with ExtOpcode. It means
12779/// If load has explicit zero/sign extension, ExpOpcode must have the same
12780/// extension.
12781/// Otherwise returns true.
12782static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12783 if (!N.hasOneUse())
12784 return false;
12785
12786 if (!isa<LoadSDNode>(N))
12787 return false;
12788
12789 LoadSDNode *Load = cast<LoadSDNode>(N);
12790 ISD::LoadExtType LoadExt = Load->getExtensionType();
12791 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12792 return true;
12793
12794 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12795 // extension.
12796 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12797 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12798 return false;
12799
12800 return true;
12801}
12802
12803/// Fold
12804/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12805/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12806/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12807/// This function is called by the DAGCombiner when visiting sext/zext/aext
12808/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12810 SelectionDAG &DAG, const SDLoc &DL,
12811 CombineLevel Level) {
12812 unsigned Opcode = N->getOpcode();
12813 SDValue N0 = N->getOperand(0);
12814 EVT VT = N->getValueType(0);
12815 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12816 Opcode == ISD::ANY_EXTEND) &&
12817 "Expected EXTEND dag node in input!");
12818
12819 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12820 !N0.hasOneUse())
12821 return SDValue();
12822
12823 SDValue Op1 = N0->getOperand(1);
12824 SDValue Op2 = N0->getOperand(2);
12825 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12826 return SDValue();
12827
12828 auto ExtLoadOpcode = ISD::EXTLOAD;
12829 if (Opcode == ISD::SIGN_EXTEND)
12830 ExtLoadOpcode = ISD::SEXTLOAD;
12831 else if (Opcode == ISD::ZERO_EXTEND)
12832 ExtLoadOpcode = ISD::ZEXTLOAD;
12833
12834 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12835 // Combine2), so we should conservatively check the OperationAction.
12836 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12837 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12838 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12839 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12840 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12842 return SDValue();
12843
12844 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12845 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12846 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12847}
12848
12849/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12850/// a build_vector of constants.
12851/// This function is called by the DAGCombiner when visiting sext/zext/aext
12852/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12853/// Vector extends are not folded if operations are legal; this is to
12854/// avoid introducing illegal build_vector dag nodes.
12856 const TargetLowering &TLI,
12857 SelectionDAG &DAG, bool LegalTypes) {
12858 unsigned Opcode = N->getOpcode();
12859 SDValue N0 = N->getOperand(0);
12860 EVT VT = N->getValueType(0);
12861
12862 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12863 "Expected EXTEND dag node in input!");
12864
12865 // fold (sext c1) -> c1
12866 // fold (zext c1) -> c1
12867 // fold (aext c1) -> c1
12868 if (isa<ConstantSDNode>(N0))
12869 return DAG.getNode(Opcode, DL, VT, N0);
12870
12871 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12872 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12873 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12874 if (N0->getOpcode() == ISD::SELECT) {
12875 SDValue Op1 = N0->getOperand(1);
12876 SDValue Op2 = N0->getOperand(2);
12877 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12878 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12879 // For any_extend, choose sign extension of the constants to allow a
12880 // possible further transform to sign_extend_inreg.i.e.
12881 //
12882 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12883 // t2: i64 = any_extend t1
12884 // -->
12885 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12886 // -->
12887 // t4: i64 = sign_extend_inreg t3
12888 unsigned FoldOpc = Opcode;
12889 if (FoldOpc == ISD::ANY_EXTEND)
12890 FoldOpc = ISD::SIGN_EXTEND;
12891 return DAG.getSelect(DL, VT, N0->getOperand(0),
12892 DAG.getNode(FoldOpc, DL, VT, Op1),
12893 DAG.getNode(FoldOpc, DL, VT, Op2));
12894 }
12895 }
12896
12897 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12898 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12899 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12900 EVT SVT = VT.getScalarType();
12901 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12903 return SDValue();
12904
12905 // We can fold this node into a build_vector.
12906 unsigned VTBits = SVT.getSizeInBits();
12907 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12909 unsigned NumElts = VT.getVectorNumElements();
12910
12911 for (unsigned i = 0; i != NumElts; ++i) {
12912 SDValue Op = N0.getOperand(i);
12913 if (Op.isUndef()) {
12914 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12915 Elts.push_back(DAG.getUNDEF(SVT));
12916 else
12917 Elts.push_back(DAG.getConstant(0, DL, SVT));
12918 continue;
12919 }
12920
12921 SDLoc DL(Op);
12922 // Get the constant value and if needed trunc it to the size of the type.
12923 // Nodes like build_vector might have constants wider than the scalar type.
12924 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12925 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12926 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12927 else
12928 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12929 }
12930
12931 return DAG.getBuildVector(VT, DL, Elts);
12932}
12933
12934// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12935// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12936// transformation. Returns true if extension are possible and the above
12937// mentioned transformation is profitable.
12939 unsigned ExtOpc,
12940 SmallVectorImpl<SDNode *> &ExtendNodes,
12941 const TargetLowering &TLI) {
12942 bool HasCopyToRegUses = false;
12943 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12944 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12945 ++UI) {
12946 SDNode *User = *UI;
12947 if (User == N)
12948 continue;
12949 if (UI.getUse().getResNo() != N0.getResNo())
12950 continue;
12951 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12952 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12953 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12954 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12955 // Sign bits will be lost after a zext.
12956 return false;
12957 bool Add = false;
12958 for (unsigned i = 0; i != 2; ++i) {
12959 SDValue UseOp = User->getOperand(i);
12960 if (UseOp == N0)
12961 continue;
12962 if (!isa<ConstantSDNode>(UseOp))
12963 return false;
12964 Add = true;
12965 }
12966 if (Add)
12967 ExtendNodes.push_back(User);
12968 continue;
12969 }
12970 // If truncates aren't free and there are users we can't
12971 // extend, it isn't worthwhile.
12972 if (!isTruncFree)
12973 return false;
12974 // Remember if this value is live-out.
12975 if (User->getOpcode() == ISD::CopyToReg)
12976 HasCopyToRegUses = true;
12977 }
12978
12979 if (HasCopyToRegUses) {
12980 bool BothLiveOut = false;
12981 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12982 UI != UE; ++UI) {
12983 SDUse &Use = UI.getUse();
12984 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12985 BothLiveOut = true;
12986 break;
12987 }
12988 }
12989 if (BothLiveOut)
12990 // Both unextended and extended values are live out. There had better be
12991 // a good reason for the transformation.
12992 return !ExtendNodes.empty();
12993 }
12994 return true;
12995}
12996
12997void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12998 SDValue OrigLoad, SDValue ExtLoad,
12999 ISD::NodeType ExtType) {
13000 // Extend SetCC uses if necessary.
13001 SDLoc DL(ExtLoad);
13002 for (SDNode *SetCC : SetCCs) {
13004
13005 for (unsigned j = 0; j != 2; ++j) {
13006 SDValue SOp = SetCC->getOperand(j);
13007 if (SOp == OrigLoad)
13008 Ops.push_back(ExtLoad);
13009 else
13010 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
13011 }
13012
13013 Ops.push_back(SetCC->getOperand(2));
13014 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
13015 }
13016}
13017
13018// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
13019SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
13020 SDValue N0 = N->getOperand(0);
13021 EVT DstVT = N->getValueType(0);
13022 EVT SrcVT = N0.getValueType();
13023
13024 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13025 N->getOpcode() == ISD::ZERO_EXTEND) &&
13026 "Unexpected node type (not an extend)!");
13027
13028 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
13029 // For example, on a target with legal v4i32, but illegal v8i32, turn:
13030 // (v8i32 (sext (v8i16 (load x))))
13031 // into:
13032 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13033 // (v4i32 (sextload (x + 16)))))
13034 // Where uses of the original load, i.e.:
13035 // (v8i16 (load x))
13036 // are replaced with:
13037 // (v8i16 (truncate
13038 // (v8i32 (concat_vectors (v4i32 (sextload x)),
13039 // (v4i32 (sextload (x + 16)))))))
13040 //
13041 // This combine is only applicable to illegal, but splittable, vectors.
13042 // All legal types, and illegal non-vector types, are handled elsewhere.
13043 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
13044 //
13045 if (N0->getOpcode() != ISD::LOAD)
13046 return SDValue();
13047
13048 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13049
13050 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
13051 !N0.hasOneUse() || !LN0->isSimple() ||
13052 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
13054 return SDValue();
13055
13057 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
13058 return SDValue();
13059
13060 ISD::LoadExtType ExtType =
13061 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13062
13063 // Try to split the vector types to get down to legal types.
13064 EVT SplitSrcVT = SrcVT;
13065 EVT SplitDstVT = DstVT;
13066 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
13067 SplitSrcVT.getVectorNumElements() > 1) {
13068 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
13069 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
13070 }
13071
13072 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
13073 return SDValue();
13074
13075 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13076
13077 SDLoc DL(N);
13078 const unsigned NumSplits =
13079 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13080 const unsigned Stride = SplitSrcVT.getStoreSize();
13083
13084 SDValue BasePtr = LN0->getBasePtr();
13085 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13086 const unsigned Offset = Idx * Stride;
13087
13088 SDValue SplitLoad =
13089 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13090 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13091 SplitSrcVT, LN0->getOriginalAlign(),
13092 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13093
13094 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13095
13096 Loads.push_back(SplitLoad.getValue(0));
13097 Chains.push_back(SplitLoad.getValue(1));
13098 }
13099
13100 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13101 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13102
13103 // Simplify TF.
13104 AddToWorklist(NewChain.getNode());
13105
13106 CombineTo(N, NewValue);
13107
13108 // Replace uses of the original load (before extension)
13109 // with a truncate of the concatenated sextloaded vectors.
13110 SDValue Trunc =
13111 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13112 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13113 CombineTo(N0.getNode(), Trunc, NewChain);
13114 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13115}
13116
13117// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13118// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13119SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13120 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13121 EVT VT = N->getValueType(0);
13122 EVT OrigVT = N->getOperand(0).getValueType();
13123 if (TLI.isZExtFree(OrigVT, VT))
13124 return SDValue();
13125
13126 // and/or/xor
13127 SDValue N0 = N->getOperand(0);
13128 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13129 N0.getOperand(1).getOpcode() != ISD::Constant ||
13130 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13131 return SDValue();
13132
13133 // shl/shr
13134 SDValue N1 = N0->getOperand(0);
13135 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13136 N1.getOperand(1).getOpcode() != ISD::Constant ||
13137 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13138 return SDValue();
13139
13140 // load
13141 if (!isa<LoadSDNode>(N1.getOperand(0)))
13142 return SDValue();
13143 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13144 EVT MemVT = Load->getMemoryVT();
13145 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13146 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13147 return SDValue();
13148
13149
13150 // If the shift op is SHL, the logic op must be AND, otherwise the result
13151 // will be wrong.
13152 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13153 return SDValue();
13154
13155 if (!N0.hasOneUse() || !N1.hasOneUse())
13156 return SDValue();
13157
13159 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13160 ISD::ZERO_EXTEND, SetCCs, TLI))
13161 return SDValue();
13162
13163 // Actually do the transformation.
13164 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13165 Load->getChain(), Load->getBasePtr(),
13166 Load->getMemoryVT(), Load->getMemOperand());
13167
13168 SDLoc DL1(N1);
13169 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13170 N1.getOperand(1));
13171
13173 SDLoc DL0(N0);
13174 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13175 DAG.getConstant(Mask, DL0, VT));
13176
13177 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13178 CombineTo(N, And);
13179 if (SDValue(Load, 0).hasOneUse()) {
13180 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13181 } else {
13182 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13183 Load->getValueType(0), ExtLoad);
13184 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13185 }
13186
13187 // N0 is dead at this point.
13188 recursivelyDeleteUnusedNodes(N0.getNode());
13189
13190 return SDValue(N,0); // Return N so it doesn't get rechecked!
13191}
13192
13193/// If we're narrowing or widening the result of a vector select and the final
13194/// size is the same size as a setcc (compare) feeding the select, then try to
13195/// apply the cast operation to the select's operands because matching vector
13196/// sizes for a select condition and other operands should be more efficient.
13197SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13198 unsigned CastOpcode = Cast->getOpcode();
13199 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13200 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13201 CastOpcode == ISD::FP_ROUND) &&
13202 "Unexpected opcode for vector select narrowing/widening");
13203
13204 // We only do this transform before legal ops because the pattern may be
13205 // obfuscated by target-specific operations after legalization. Do not create
13206 // an illegal select op, however, because that may be difficult to lower.
13207 EVT VT = Cast->getValueType(0);
13208 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13209 return SDValue();
13210
13211 SDValue VSel = Cast->getOperand(0);
13212 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13213 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13214 return SDValue();
13215
13216 // Does the setcc have the same vector size as the casted select?
13217 SDValue SetCC = VSel.getOperand(0);
13218 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13219 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13220 return SDValue();
13221
13222 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13223 SDValue A = VSel.getOperand(1);
13224 SDValue B = VSel.getOperand(2);
13225 SDValue CastA, CastB;
13226 SDLoc DL(Cast);
13227 if (CastOpcode == ISD::FP_ROUND) {
13228 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13229 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13230 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13231 } else {
13232 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13233 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13234 }
13235 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13236}
13237
13238// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13239// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13241 const TargetLowering &TLI, EVT VT,
13242 bool LegalOperations, SDNode *N,
13243 SDValue N0, ISD::LoadExtType ExtLoadType) {
13244 SDNode *N0Node = N0.getNode();
13245 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13246 : ISD::isZEXTLoad(N0Node);
13247 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13248 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13249 return SDValue();
13250
13251 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13252 EVT MemVT = LN0->getMemoryVT();
13253 if ((LegalOperations || !LN0->isSimple() ||
13254 VT.isVector()) &&
13255 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13256 return SDValue();
13257
13258 SDValue ExtLoad =
13259 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13260 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13261 Combiner.CombineTo(N, ExtLoad);
13262 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13263 if (LN0->use_empty())
13264 Combiner.recursivelyDeleteUnusedNodes(LN0);
13265 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13266}
13267
13268// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13269// Only generate vector extloads when 1) they're legal, and 2) they are
13270// deemed desirable by the target. NonNegZExt can be set to true if a zero
13271// extend has the nonneg flag to allow use of sextload if profitable.
13273 const TargetLowering &TLI, EVT VT,
13274 bool LegalOperations, SDNode *N, SDValue N0,
13275 ISD::LoadExtType ExtLoadType,
13276 ISD::NodeType ExtOpc,
13277 bool NonNegZExt = false) {
13279 return {};
13280
13281 // If this is zext nneg, see if it would make sense to treat it as a sext.
13282 if (NonNegZExt) {
13283 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13284 "Unexpected load type or opcode");
13285 for (SDNode *User : N0->uses()) {
13286 if (User->getOpcode() == ISD::SETCC) {
13287 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13289 ExtLoadType = ISD::SEXTLOAD;
13290 ExtOpc = ISD::SIGN_EXTEND;
13291 break;
13292 }
13293 }
13294 }
13295 }
13296
13297 // TODO: isFixedLengthVector() should be removed and any negative effects on
13298 // code generation being the result of that target's implementation of
13299 // isVectorLoadExtDesirable().
13300 if ((LegalOperations || VT.isFixedLengthVector() ||
13301 !cast<LoadSDNode>(N0)->isSimple()) &&
13302 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13303 return {};
13304
13305 bool DoXform = true;
13307 if (!N0.hasOneUse())
13308 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13309 if (VT.isVector())
13310 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13311 if (!DoXform)
13312 return {};
13313
13314 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13315 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13316 LN0->getBasePtr(), N0.getValueType(),
13317 LN0->getMemOperand());
13318 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13319 // If the load value is used only by N, replace it via CombineTo N.
13320 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13321 Combiner.CombineTo(N, ExtLoad);
13322 if (NoReplaceTrunc) {
13323 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13324 Combiner.recursivelyDeleteUnusedNodes(LN0);
13325 } else {
13326 SDValue Trunc =
13327 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13328 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13329 }
13330 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13331}
13332
13333static SDValue
13335 bool LegalOperations, SDNode *N, SDValue N0,
13336 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13337 if (!N0.hasOneUse())
13338 return SDValue();
13339
13340 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13341 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13342 return SDValue();
13343
13344 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13345 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13346 return SDValue();
13347
13348 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13349 return SDValue();
13350
13351 SDLoc dl(Ld);
13352 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13353 SDValue NewLoad = DAG.getMaskedLoad(
13354 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13355 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13356 ExtLoadType, Ld->isExpandingLoad());
13357 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13358 return NewLoad;
13359}
13360
13361// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13363 const TargetLowering &TLI, EVT VT,
13364 SDValue N0,
13365 ISD::LoadExtType ExtLoadType) {
13366 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13367 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13368 return {};
13369 EVT MemoryVT = ALoad->getMemoryVT();
13370 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13371 return {};
13372 // Can't fold into ALoad if it is already extending differently.
13373 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13374 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13375 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13376 return {};
13377
13378 EVT OrigVT = ALoad->getValueType(0);
13379 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13380 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13381 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13382 ALoad->getBasePtr(), ALoad->getMemOperand()));
13383 NewALoad->setExtensionType(ExtLoadType);
13385 SDValue(ALoad, 0),
13386 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13387 // Update the chain uses.
13388 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13389 return SDValue(NewALoad, 0);
13390}
13391
13393 bool LegalOperations) {
13394 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13395 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13396
13397 SDValue SetCC = N->getOperand(0);
13398 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13399 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13400 return SDValue();
13401
13402 SDValue X = SetCC.getOperand(0);
13403 SDValue Ones = SetCC.getOperand(1);
13404 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13405 EVT VT = N->getValueType(0);
13406 EVT XVT = X.getValueType();
13407 // setge X, C is canonicalized to setgt, so we do not need to match that
13408 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13409 // not require the 'not' op.
13410 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13411 // Invert and smear/shift the sign bit:
13412 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13413 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13414 SDLoc DL(N);
13415 unsigned ShCt = VT.getSizeInBits() - 1;
13416 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13417 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13418 SDValue NotX = DAG.getNOT(DL, X, VT);
13419 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13420 auto ShiftOpcode =
13421 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13422 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13423 }
13424 }
13425 return SDValue();
13426}
13427
13428SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13429 SDValue N0 = N->getOperand(0);
13430 if (N0.getOpcode() != ISD::SETCC)
13431 return SDValue();
13432
13433 SDValue N00 = N0.getOperand(0);
13434 SDValue N01 = N0.getOperand(1);
13435 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13436 EVT VT = N->getValueType(0);
13437 EVT N00VT = N00.getValueType();
13438 SDLoc DL(N);
13439
13440 // Propagate fast-math-flags.
13441 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13442
13443 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13444 // the same size as the compared operands. Try to optimize sext(setcc())
13445 // if this is the case.
13446 if (VT.isVector() && !LegalOperations &&
13447 TLI.getBooleanContents(N00VT) ==
13449 EVT SVT = getSetCCResultType(N00VT);
13450
13451 // If we already have the desired type, don't change it.
13452 if (SVT != N0.getValueType()) {
13453 // We know that the # elements of the results is the same as the
13454 // # elements of the compare (and the # elements of the compare result
13455 // for that matter). Check to see that they are the same size. If so,
13456 // we know that the element size of the sext'd result matches the
13457 // element size of the compare operands.
13458 if (VT.getSizeInBits() == SVT.getSizeInBits())
13459 return DAG.getSetCC(DL, VT, N00, N01, CC);
13460
13461 // If the desired elements are smaller or larger than the source
13462 // elements, we can use a matching integer vector type and then
13463 // truncate/sign extend.
13464 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13465 if (SVT == MatchingVecType) {
13466 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13467 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13468 }
13469 }
13470
13471 // Try to eliminate the sext of a setcc by zexting the compare operands.
13472 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13474 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13475 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13476 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13477
13478 // We have an unsupported narrow vector compare op that would be legal
13479 // if extended to the destination type. See if the compare operands
13480 // can be freely extended to the destination type.
13481 auto IsFreeToExtend = [&](SDValue V) {
13482 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13483 return true;
13484 // Match a simple, non-extended load that can be converted to a
13485 // legal {z/s}ext-load.
13486 // TODO: Allow widening of an existing {z/s}ext-load?
13487 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13488 ISD::isUNINDEXEDLoad(V.getNode()) &&
13489 cast<LoadSDNode>(V)->isSimple() &&
13490 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13491 return false;
13492
13493 // Non-chain users of this value must either be the setcc in this
13494 // sequence or extends that can be folded into the new {z/s}ext-load.
13495 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13496 UI != UE; ++UI) {
13497 // Skip uses of the chain and the setcc.
13498 SDNode *User = *UI;
13499 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13500 continue;
13501 // Extra users must have exactly the same cast we are about to create.
13502 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13503 // is enhanced similarly.
13504 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13505 return false;
13506 }
13507 return true;
13508 };
13509
13510 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13511 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13512 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13513 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13514 }
13515 }
13516 }
13517
13518 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13519 // Here, T can be 1 or -1, depending on the type of the setcc and
13520 // getBooleanContents().
13521 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13522
13523 // To determine the "true" side of the select, we need to know the high bit
13524 // of the value returned by the setcc if it evaluates to true.
13525 // If the type of the setcc is i1, then the true case of the select is just
13526 // sext(i1 1), that is, -1.
13527 // If the type of the setcc is larger (say, i8) then the value of the high
13528 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13529 // of the appropriate width.
13530 SDValue ExtTrueVal = (SetCCWidth == 1)
13531 ? DAG.getAllOnesConstant(DL, VT)
13532 : DAG.getBoolConstant(true, DL, VT, N00VT);
13533 SDValue Zero = DAG.getConstant(0, DL, VT);
13534 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13535 return SCC;
13536
13537 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13538 EVT SetCCVT = getSetCCResultType(N00VT);
13539 // Don't do this transform for i1 because there's a select transform
13540 // that would reverse it.
13541 // TODO: We should not do this transform at all without a target hook
13542 // because a sext is likely cheaper than a select?
13543 if (SetCCVT.getScalarSizeInBits() != 1 &&
13544 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13545 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13546 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13547 }
13548 }
13549
13550 return SDValue();
13551}
13552
13553SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13554 SDValue N0 = N->getOperand(0);
13555 EVT VT = N->getValueType(0);
13556 SDLoc DL(N);
13557
13558 if (VT.isVector())
13559 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13560 return FoldedVOp;
13561
13562 // sext(undef) = 0 because the top bit will all be the same.
13563 if (N0.isUndef())
13564 return DAG.getConstant(0, DL, VT);
13565
13566 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13567 return Res;
13568
13569 // fold (sext (sext x)) -> (sext x)
13570 // fold (sext (aext x)) -> (sext x)
13571 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13572 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13573
13574 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13575 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13579 N0.getOperand(0));
13580
13581 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13582 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13583 SDValue N00 = N0.getOperand(0);
13584 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13585 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13586 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13587 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13588 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13589 }
13590 }
13591
13592 if (N0.getOpcode() == ISD::TRUNCATE) {
13593 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13594 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13595 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13596 SDNode *oye = N0.getOperand(0).getNode();
13597 if (NarrowLoad.getNode() != N0.getNode()) {
13598 CombineTo(N0.getNode(), NarrowLoad);
13599 // CombineTo deleted the truncate, if needed, but not what's under it.
13600 AddToWorklist(oye);
13601 }
13602 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13603 }
13604
13605 // See if the value being truncated is already sign extended. If so, just
13606 // eliminate the trunc/sext pair.
13607 SDValue Op = N0.getOperand(0);
13608 unsigned OpBits = Op.getScalarValueSizeInBits();
13609 unsigned MidBits = N0.getScalarValueSizeInBits();
13610 unsigned DestBits = VT.getScalarSizeInBits();
13611 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13612
13613 if (OpBits == DestBits) {
13614 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13615 // bits, it is already ready.
13616 if (NumSignBits > DestBits-MidBits)
13617 return Op;
13618 } else if (OpBits < DestBits) {
13619 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13620 // bits, just sext from i32.
13621 if (NumSignBits > OpBits-MidBits)
13622 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13623 } else {
13624 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13625 // bits, just truncate to i32.
13626 if (NumSignBits > OpBits-MidBits)
13627 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13628 }
13629
13630 // fold (sext (truncate x)) -> (sextinreg x).
13631 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13632 N0.getValueType())) {
13633 if (OpBits < DestBits)
13634 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13635 else if (OpBits > DestBits)
13636 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13637 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13638 DAG.getValueType(N0.getValueType()));
13639 }
13640 }
13641
13642 // Try to simplify (sext (load x)).
13643 if (SDValue foldedExt =
13644 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13646 return foldedExt;
13647
13648 if (SDValue foldedExt =
13649 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13651 return foldedExt;
13652
13653 // fold (sext (load x)) to multiple smaller sextloads.
13654 // Only on illegal but splittable vectors.
13655 if (SDValue ExtLoad = CombineExtLoad(N))
13656 return ExtLoad;
13657
13658 // Try to simplify (sext (sextload x)).
13659 if (SDValue foldedExt = tryToFoldExtOfExtload(
13660 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13661 return foldedExt;
13662
13663 // Try to simplify (sext (atomic_load x)).
13664 if (SDValue foldedExt =
13665 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13666 return foldedExt;
13667
13668 // fold (sext (and/or/xor (load x), cst)) ->
13669 // (and/or/xor (sextload x), (sext cst))
13670 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13671 isa<LoadSDNode>(N0.getOperand(0)) &&
13672 N0.getOperand(1).getOpcode() == ISD::Constant &&
13673 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13674 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13675 EVT MemVT = LN00->getMemoryVT();
13676 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13677 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13679 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13680 ISD::SIGN_EXTEND, SetCCs, TLI);
13681 if (DoXform) {
13682 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13683 LN00->getChain(), LN00->getBasePtr(),
13684 LN00->getMemoryVT(),
13685 LN00->getMemOperand());
13687 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13688 ExtLoad, DAG.getConstant(Mask, DL, VT));
13689 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13690 bool NoReplaceTruncAnd = !N0.hasOneUse();
13691 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13692 CombineTo(N, And);
13693 // If N0 has multiple uses, change other uses as well.
13694 if (NoReplaceTruncAnd) {
13695 SDValue TruncAnd =
13697 CombineTo(N0.getNode(), TruncAnd);
13698 }
13699 if (NoReplaceTrunc) {
13700 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13701 } else {
13702 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13703 LN00->getValueType(0), ExtLoad);
13704 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13705 }
13706 return SDValue(N,0); // Return N so it doesn't get rechecked!
13707 }
13708 }
13709 }
13710
13711 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13712 return V;
13713
13714 if (SDValue V = foldSextSetcc(N))
13715 return V;
13716
13717 // fold (sext x) -> (zext x) if the sign bit is known zero.
13718 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13719 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13720 DAG.SignBitIsZero(N0)) {
13722 Flags.setNonNeg(true);
13723 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13724 }
13725
13726 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13727 return NewVSel;
13728
13729 // Eliminate this sign extend by doing a negation in the destination type:
13730 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13731 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13735 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13736 return DAG.getNegative(Zext, DL, VT);
13737 }
13738 // Eliminate this sign extend by doing a decrement in the destination type:
13739 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13740 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13744 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13745 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13746 }
13747
13748 // fold sext (not i1 X) -> add (zext i1 X), -1
13749 // TODO: This could be extended to handle bool vectors.
13750 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13751 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13752 TLI.isOperationLegal(ISD::ADD, VT)))) {
13753 // If we can eliminate the 'not', the sext form should be better
13754 if (SDValue NewXor = visitXOR(N0.getNode())) {
13755 // Returning N0 is a form of in-visit replacement that may have
13756 // invalidated N0.
13757 if (NewXor.getNode() == N0.getNode()) {
13758 // Return SDValue here as the xor should have already been replaced in
13759 // this sext.
13760 return SDValue();
13761 }
13762
13763 // Return a new sext with the new xor.
13764 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13765 }
13766
13767 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13768 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13769 }
13770
13771 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
13772 return Res;
13773
13774 return SDValue();
13775}
13776
13777/// Given an extending node with a pop-count operand, if the target does not
13778/// support a pop-count in the narrow source type but does support it in the
13779/// destination type, widen the pop-count to the destination type.
13780static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL) {
13781 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13782 Extend->getOpcode() == ISD::ANY_EXTEND) &&
13783 "Expected extend op");
13784
13785 SDValue CtPop = Extend->getOperand(0);
13786 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13787 return SDValue();
13788
13789 EVT VT = Extend->getValueType(0);
13790 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13793 return SDValue();
13794
13795 // zext (ctpop X) --> ctpop (zext X)
13796 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13797 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13798}
13799
13800// If we have (zext (abs X)) where X is a type that will be promoted by type
13801// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13802static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13803 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13804
13805 EVT VT = Extend->getValueType(0);
13806 if (VT.isVector())
13807 return SDValue();
13808
13809 SDValue Abs = Extend->getOperand(0);
13810 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13811 return SDValue();
13812
13813 EVT AbsVT = Abs.getValueType();
13814 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13815 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13817 return SDValue();
13818
13819 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13820
13821 SDValue SExt =
13822 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13823 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13824 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13825}
13826
13827SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13828 SDValue N0 = N->getOperand(0);
13829 EVT VT = N->getValueType(0);
13830 SDLoc DL(N);
13831
13832 if (VT.isVector())
13833 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13834 return FoldedVOp;
13835
13836 // zext(undef) = 0
13837 if (N0.isUndef())
13838 return DAG.getConstant(0, DL, VT);
13839
13840 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13841 return Res;
13842
13843 // fold (zext (zext x)) -> (zext x)
13844 // fold (zext (aext x)) -> (zext x)
13845 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13847 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13848 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13849 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13850 }
13851
13852 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13853 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13856 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, N0.getOperand(0));
13857
13858 // fold (zext (truncate x)) -> (zext x) or
13859 // (zext (truncate x)) -> (truncate x)
13860 // This is valid when the truncated bits of x are already zero.
13861 SDValue Op;
13862 KnownBits Known;
13863 if (isTruncateOf(DAG, N0, Op, Known)) {
13864 APInt TruncatedBits =
13865 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13866 APInt(Op.getScalarValueSizeInBits(), 0) :
13867 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13869 std::min(Op.getScalarValueSizeInBits(),
13870 VT.getScalarSizeInBits()));
13871 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13872 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13873 DAG.salvageDebugInfo(*N0.getNode());
13874
13875 return ZExtOrTrunc;
13876 }
13877 }
13878
13879 // fold (zext (truncate x)) -> (and x, mask)
13880 if (N0.getOpcode() == ISD::TRUNCATE) {
13881 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13882 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13883 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13884 SDNode *oye = N0.getOperand(0).getNode();
13885 if (NarrowLoad.getNode() != N0.getNode()) {
13886 CombineTo(N0.getNode(), NarrowLoad);
13887 // CombineTo deleted the truncate, if needed, but not what's under it.
13888 AddToWorklist(oye);
13889 }
13890 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13891 }
13892
13893 EVT SrcVT = N0.getOperand(0).getValueType();
13894 EVT MinVT = N0.getValueType();
13895
13896 if (N->getFlags().hasNonNeg()) {
13897 SDValue Op = N0.getOperand(0);
13898 unsigned OpBits = SrcVT.getScalarSizeInBits();
13899 unsigned MidBits = MinVT.getScalarSizeInBits();
13900 unsigned DestBits = VT.getScalarSizeInBits();
13901 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13902
13903 if (OpBits == DestBits) {
13904 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13905 // bits, it is already ready.
13906 if (NumSignBits > DestBits - MidBits)
13907 return Op;
13908 } else if (OpBits < DestBits) {
13909 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13910 // bits, just sext from i32.
13911 // FIXME: This can probably be ZERO_EXTEND nneg?
13912 if (NumSignBits > OpBits - MidBits)
13913 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13914 } else {
13915 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13916 // bits, just truncate to i32.
13917 if (NumSignBits > OpBits - MidBits)
13918 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13919 }
13920 }
13921
13922 // Try to mask before the extension to avoid having to generate a larger mask,
13923 // possibly over several sub-vectors.
13924 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13925 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13927 SDValue Op = N0.getOperand(0);
13928 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13929 AddToWorklist(Op.getNode());
13930 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13931 // Transfer the debug info; the new node is equivalent to N0.
13932 DAG.transferDbgValues(N0, ZExtOrTrunc);
13933 return ZExtOrTrunc;
13934 }
13935 }
13936
13937 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13938 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13939 AddToWorklist(Op.getNode());
13940 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13941 // We may safely transfer the debug info describing the truncate node over
13942 // to the equivalent and operation.
13943 DAG.transferDbgValues(N0, And);
13944 return And;
13945 }
13946 }
13947
13948 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13949 // if either of the casts is not free.
13950 if (N0.getOpcode() == ISD::AND &&
13951 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13952 N0.getOperand(1).getOpcode() == ISD::Constant &&
13953 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13954 !TLI.isZExtFree(N0.getValueType(), VT))) {
13955 SDValue X = N0.getOperand(0).getOperand(0);
13956 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13958 return DAG.getNode(ISD::AND, DL, VT,
13959 X, DAG.getConstant(Mask, DL, VT));
13960 }
13961
13962 // Try to simplify (zext (load x)).
13963 if (SDValue foldedExt = tryToFoldExtOfLoad(
13964 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13965 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13966 return foldedExt;
13967
13968 if (SDValue foldedExt =
13969 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13971 return foldedExt;
13972
13973 // fold (zext (load x)) to multiple smaller zextloads.
13974 // Only on illegal but splittable vectors.
13975 if (SDValue ExtLoad = CombineExtLoad(N))
13976 return ExtLoad;
13977
13978 // Try to simplify (zext (atomic_load x)).
13979 if (SDValue foldedExt =
13980 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13981 return foldedExt;
13982
13983 // fold (zext (and/or/xor (load x), cst)) ->
13984 // (and/or/xor (zextload x), (zext cst))
13985 // Unless (and (load x) cst) will match as a zextload already and has
13986 // additional users, or the zext is already free.
13987 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13988 isa<LoadSDNode>(N0.getOperand(0)) &&
13989 N0.getOperand(1).getOpcode() == ISD::Constant &&
13990 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13991 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13992 EVT MemVT = LN00->getMemoryVT();
13993 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13994 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13995 bool DoXform = true;
13997 if (!N0.hasOneUse()) {
13998 if (N0.getOpcode() == ISD::AND) {
13999 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
14000 EVT LoadResultTy = AndC->getValueType(0);
14001 EVT ExtVT;
14002 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
14003 DoXform = false;
14004 }
14005 }
14006 if (DoXform)
14007 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
14008 ISD::ZERO_EXTEND, SetCCs, TLI);
14009 if (DoXform) {
14010 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
14011 LN00->getChain(), LN00->getBasePtr(),
14012 LN00->getMemoryVT(),
14013 LN00->getMemOperand());
14015 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
14016 ExtLoad, DAG.getConstant(Mask, DL, VT));
14017 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
14018 bool NoReplaceTruncAnd = !N0.hasOneUse();
14019 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
14020 CombineTo(N, And);
14021 // If N0 has multiple uses, change other uses as well.
14022 if (NoReplaceTruncAnd) {
14023 SDValue TruncAnd =
14025 CombineTo(N0.getNode(), TruncAnd);
14026 }
14027 if (NoReplaceTrunc) {
14028 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
14029 } else {
14030 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
14031 LN00->getValueType(0), ExtLoad);
14032 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
14033 }
14034 return SDValue(N,0); // Return N so it doesn't get rechecked!
14035 }
14036 }
14037 }
14038
14039 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
14040 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
14041 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
14042 return ZExtLoad;
14043
14044 // Try to simplify (zext (zextload x)).
14045 if (SDValue foldedExt = tryToFoldExtOfExtload(
14046 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
14047 return foldedExt;
14048
14049 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
14050 return V;
14051
14052 if (N0.getOpcode() == ISD::SETCC) {
14053 // Propagate fast-math-flags.
14054 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14055
14056 // Only do this before legalize for now.
14057 if (!LegalOperations && VT.isVector() &&
14058 N0.getValueType().getVectorElementType() == MVT::i1) {
14059 EVT N00VT = N0.getOperand(0).getValueType();
14060 if (getSetCCResultType(N00VT) == N0.getValueType())
14061 return SDValue();
14062
14063 // We know that the # elements of the results is the same as the #
14064 // elements of the compare (and the # elements of the compare result for
14065 // that matter). Check to see that they are the same size. If so, we know
14066 // that the element size of the sext'd result matches the element size of
14067 // the compare operands.
14068 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
14069 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
14070 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
14071 N0.getOperand(1), N0.getOperand(2));
14072 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14073 }
14074
14075 // If the desired elements are smaller or larger than the source
14076 // elements we can use a matching integer vector type and then
14077 // truncate/any extend followed by zext_in_reg.
14078 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14079 SDValue VsetCC =
14080 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14081 N0.getOperand(1), N0.getOperand(2));
14082 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14083 N0.getValueType());
14084 }
14085
14086 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14087 EVT N0VT = N0.getValueType();
14088 EVT N00VT = N0.getOperand(0).getValueType();
14089 if (SDValue SCC = SimplifySelectCC(
14090 DL, N0.getOperand(0), N0.getOperand(1),
14091 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14092 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14093 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14094 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14095 }
14096
14097 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14098 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14099 !TLI.isZExtFree(N0, VT)) {
14100 SDValue ShVal = N0.getOperand(0);
14101 SDValue ShAmt = N0.getOperand(1);
14102 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14103 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14104 if (N0.getOpcode() == ISD::SHL) {
14105 // If the original shl may be shifting out bits, do not perform this
14106 // transformation.
14107 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14108 ShVal.getOperand(0).getValueSizeInBits();
14109 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14110 // If the shift is too large, then see if we can deduce that the
14111 // shift is safe anyway.
14112 // Create a mask that has ones for the bits being shifted out.
14113 APInt ShiftOutMask =
14115 ShAmtC->getAPIntValue().getZExtValue());
14116
14117 // Check if the bits being shifted out are known to be zero.
14118 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
14119 return SDValue();
14120 }
14121 }
14122
14123 // Ensure that the shift amount is wide enough for the shifted value.
14124 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14125 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14126
14127 return DAG.getNode(N0.getOpcode(), DL, VT,
14128 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14129 }
14130 }
14131 }
14132
14133 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14134 return NewVSel;
14135
14136 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14137 return NewCtPop;
14138
14139 if (SDValue V = widenAbs(N, DAG))
14140 return V;
14141
14142 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14143 return Res;
14144
14145 // CSE zext nneg with sext if the zext is not free.
14146 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14147 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14148 if (CSENode)
14149 return SDValue(CSENode, 0);
14150 }
14151
14152 return SDValue();
14153}
14154
14155SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14156 SDValue N0 = N->getOperand(0);
14157 EVT VT = N->getValueType(0);
14158 SDLoc DL(N);
14159
14160 // aext(undef) = undef
14161 if (N0.isUndef())
14162 return DAG.getUNDEF(VT);
14163
14164 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14165 return Res;
14166
14167 // fold (aext (aext x)) -> (aext x)
14168 // fold (aext (zext x)) -> (zext x)
14169 // fold (aext (sext x)) -> (sext x)
14170 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14171 N0.getOpcode() == ISD::SIGN_EXTEND) {
14173 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14174 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14175 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14176 }
14177
14178 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14179 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14180 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14184 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14185
14186 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14187 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14188 if (N0.getOpcode() == ISD::TRUNCATE) {
14189 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14190 SDNode *oye = N0.getOperand(0).getNode();
14191 if (NarrowLoad.getNode() != N0.getNode()) {
14192 CombineTo(N0.getNode(), NarrowLoad);
14193 // CombineTo deleted the truncate, if needed, but not what's under it.
14194 AddToWorklist(oye);
14195 }
14196 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14197 }
14198 }
14199
14200 // fold (aext (truncate x))
14201 if (N0.getOpcode() == ISD::TRUNCATE)
14202 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14203
14204 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14205 // if the trunc is not free.
14206 if (N0.getOpcode() == ISD::AND &&
14207 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14208 N0.getOperand(1).getOpcode() == ISD::Constant &&
14209 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14210 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14211 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14212 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14213 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14214 }
14215
14216 // fold (aext (load x)) -> (aext (truncate (extload x)))
14217 // None of the supported targets knows how to perform load and any_ext
14218 // on vectors in one instruction, so attempt to fold to zext instead.
14219 if (VT.isVector()) {
14220 // Try to simplify (zext (load x)).
14221 if (SDValue foldedExt =
14222 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14224 return foldedExt;
14225 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14227 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14228 bool DoXform = true;
14230 if (!N0.hasOneUse())
14231 DoXform =
14232 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14233 if (DoXform) {
14234 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14235 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14236 LN0->getBasePtr(), N0.getValueType(),
14237 LN0->getMemOperand());
14238 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14239 // If the load value is used only by N, replace it via CombineTo N.
14240 bool NoReplaceTrunc = N0.hasOneUse();
14241 CombineTo(N, ExtLoad);
14242 if (NoReplaceTrunc) {
14243 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14244 recursivelyDeleteUnusedNodes(LN0);
14245 } else {
14246 SDValue Trunc =
14247 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14248 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14249 }
14250 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14251 }
14252 }
14253
14254 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14255 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14256 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14257 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14258 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14259 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14260 ISD::LoadExtType ExtType = LN0->getExtensionType();
14261 EVT MemVT = LN0->getMemoryVT();
14262 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14263 SDValue ExtLoad =
14264 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14265 MemVT, LN0->getMemOperand());
14266 CombineTo(N, ExtLoad);
14267 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14268 recursivelyDeleteUnusedNodes(LN0);
14269 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14270 }
14271 }
14272
14273 if (N0.getOpcode() == ISD::SETCC) {
14274 // Propagate fast-math-flags.
14275 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14276
14277 // For vectors:
14278 // aext(setcc) -> vsetcc
14279 // aext(setcc) -> truncate(vsetcc)
14280 // aext(setcc) -> aext(vsetcc)
14281 // Only do this before legalize for now.
14282 if (VT.isVector() && !LegalOperations) {
14283 EVT N00VT = N0.getOperand(0).getValueType();
14284 if (getSetCCResultType(N00VT) == N0.getValueType())
14285 return SDValue();
14286
14287 // We know that the # elements of the results is the same as the
14288 // # elements of the compare (and the # elements of the compare result
14289 // for that matter). Check to see that they are the same size. If so,
14290 // we know that the element size of the sext'd result matches the
14291 // element size of the compare operands.
14292 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14293 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14294 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14295
14296 // If the desired elements are smaller or larger than the source
14297 // elements we can use a matching integer vector type and then
14298 // truncate/any extend
14299 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14300 SDValue VsetCC = DAG.getSetCC(
14301 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14302 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14303 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14304 }
14305
14306 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14307 if (SDValue SCC = SimplifySelectCC(
14308 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14309 DAG.getConstant(0, DL, VT),
14310 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14311 return SCC;
14312 }
14313
14314 if (SDValue NewCtPop = widenCtPop(N, DAG, DL))
14315 return NewCtPop;
14316
14317 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, DL, Level))
14318 return Res;
14319
14320 return SDValue();
14321}
14322
14323SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14324 unsigned Opcode = N->getOpcode();
14325 SDValue N0 = N->getOperand(0);
14326 SDValue N1 = N->getOperand(1);
14327 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14328
14329 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14330 if (N0.getOpcode() == Opcode &&
14331 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14332 return N0;
14333
14334 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14335 N0.getOperand(0).getOpcode() == Opcode) {
14336 // We have an assert, truncate, assert sandwich. Make one stronger assert
14337 // by asserting on the smallest asserted type to the larger source type.
14338 // This eliminates the later assert:
14339 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14340 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14341 SDLoc DL(N);
14342 SDValue BigA = N0.getOperand(0);
14343 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14344 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14345 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14346 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14347 BigA.getOperand(0), MinAssertVTVal);
14348 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14349 }
14350
14351 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14352 // than X. Just move the AssertZext in front of the truncate and drop the
14353 // AssertSExt.
14354 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14356 Opcode == ISD::AssertZext) {
14357 SDValue BigA = N0.getOperand(0);
14358 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14359 if (AssertVT.bitsLT(BigA_AssertVT)) {
14360 SDLoc DL(N);
14361 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14362 BigA.getOperand(0), N1);
14363 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14364 }
14365 }
14366
14367 return SDValue();
14368}
14369
14370SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14371 SDLoc DL(N);
14372
14373 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14374 SDValue N0 = N->getOperand(0);
14375
14376 // Fold (assertalign (assertalign x, AL0), AL1) ->
14377 // (assertalign x, max(AL0, AL1))
14378 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14379 return DAG.getAssertAlign(DL, N0.getOperand(0),
14380 std::max(AL, AAN->getAlign()));
14381
14382 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14383 // this assert down to source operands so that those arithmetic ops could be
14384 // exposed to the DAG combining.
14385 switch (N0.getOpcode()) {
14386 default:
14387 break;
14388 case ISD::ADD:
14389 case ISD::SUB: {
14390 unsigned AlignShift = Log2(AL);
14391 SDValue LHS = N0.getOperand(0);
14392 SDValue RHS = N0.getOperand(1);
14393 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14394 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14395 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14396 if (LHSAlignShift < AlignShift)
14397 LHS = DAG.getAssertAlign(DL, LHS, AL);
14398 if (RHSAlignShift < AlignShift)
14399 RHS = DAG.getAssertAlign(DL, RHS, AL);
14400 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14401 }
14402 break;
14403 }
14404 }
14405
14406 return SDValue();
14407}
14408
14409/// If the result of a load is shifted/masked/truncated to an effectively
14410/// narrower type, try to transform the load to a narrower type and/or
14411/// use an extending load.
14412SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14413 unsigned Opc = N->getOpcode();
14414
14416 SDValue N0 = N->getOperand(0);
14417 EVT VT = N->getValueType(0);
14418 EVT ExtVT = VT;
14419
14420 // This transformation isn't valid for vector loads.
14421 if (VT.isVector())
14422 return SDValue();
14423
14424 // The ShAmt variable is used to indicate that we've consumed a right
14425 // shift. I.e. we want to narrow the width of the load by skipping to load the
14426 // ShAmt least significant bits.
14427 unsigned ShAmt = 0;
14428 // A special case is when the least significant bits from the load are masked
14429 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14430 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14431 // the result.
14432 unsigned ShiftedOffset = 0;
14433 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14434 // extended to VT.
14435 if (Opc == ISD::SIGN_EXTEND_INREG) {
14436 ExtType = ISD::SEXTLOAD;
14437 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14438 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14439 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14440 // value, or it may be shifting a higher subword, half or byte into the
14441 // lowest bits.
14442
14443 // Only handle shift with constant shift amount, and the shiftee must be a
14444 // load.
14445 auto *LN = dyn_cast<LoadSDNode>(N0);
14446 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14447 if (!N1C || !LN)
14448 return SDValue();
14449 // If the shift amount is larger than the memory type then we're not
14450 // accessing any of the loaded bytes.
14451 ShAmt = N1C->getZExtValue();
14452 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14453 if (MemoryWidth <= ShAmt)
14454 return SDValue();
14455 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14456 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14457 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14458 // If original load is a SEXTLOAD then we can't simply replace it by a
14459 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14460 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14461 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14462 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14463 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14464 LN->getExtensionType() != ExtType)
14465 return SDValue();
14466 } else if (Opc == ISD::AND) {
14467 // An AND with a constant mask is the same as a truncate + zero-extend.
14468 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14469 if (!AndC)
14470 return SDValue();
14471
14472 const APInt &Mask = AndC->getAPIntValue();
14473 unsigned ActiveBits = 0;
14474 if (Mask.isMask()) {
14475 ActiveBits = Mask.countr_one();
14476 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14477 ShiftedOffset = ShAmt;
14478 } else {
14479 return SDValue();
14480 }
14481
14482 ExtType = ISD::ZEXTLOAD;
14483 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14484 }
14485
14486 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14487 // a right shift. Here we redo some of those checks, to possibly adjust the
14488 // ExtVT even further based on "a masking AND". We could also end up here for
14489 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14490 // need to be done here as well.
14491 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14492 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14493 // Bail out when the SRL has more than one use. This is done for historical
14494 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14495 // check below? And maybe it could be non-profitable to do the transform in
14496 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14497 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14498 if (!SRL.hasOneUse())
14499 return SDValue();
14500
14501 // Only handle shift with constant shift amount, and the shiftee must be a
14502 // load.
14503 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14504 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14505 if (!SRL1C || !LN)
14506 return SDValue();
14507
14508 // If the shift amount is larger than the input type then we're not
14509 // accessing any of the loaded bytes. If the load was a zextload/extload
14510 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14511 ShAmt = SRL1C->getZExtValue();
14512 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14513 if (ShAmt >= MemoryWidth)
14514 return SDValue();
14515
14516 // Because a SRL must be assumed to *need* to zero-extend the high bits
14517 // (as opposed to anyext the high bits), we can't combine the zextload
14518 // lowering of SRL and an sextload.
14519 if (LN->getExtensionType() == ISD::SEXTLOAD)
14520 return SDValue();
14521
14522 // Avoid reading outside the memory accessed by the original load (could
14523 // happened if we only adjust the load base pointer by ShAmt). Instead we
14524 // try to narrow the load even further. The typical scenario here is:
14525 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14526 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14527 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14528 // Don't replace sextload by zextload.
14529 if (ExtType == ISD::SEXTLOAD)
14530 return SDValue();
14531 // Narrow the load.
14532 ExtType = ISD::ZEXTLOAD;
14533 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14534 }
14535
14536 // If the SRL is only used by a masking AND, we may be able to adjust
14537 // the ExtVT to make the AND redundant.
14538 SDNode *Mask = *(SRL->use_begin());
14539 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14540 isa<ConstantSDNode>(Mask->getOperand(1))) {
14541 unsigned Offset, ActiveBits;
14542 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14543 if (ShiftMask.isMask()) {
14544 EVT MaskedVT =
14545 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14546 // If the mask is smaller, recompute the type.
14547 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14548 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14549 ExtVT = MaskedVT;
14550 } else if (ExtType == ISD::ZEXTLOAD &&
14551 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14552 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14553 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14554 // If the mask is shifted we can use a narrower load and a shl to insert
14555 // the trailing zeros.
14556 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14557 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14558 ExtVT = MaskedVT;
14559 ShAmt = Offset + ShAmt;
14560 ShiftedOffset = Offset;
14561 }
14562 }
14563 }
14564
14565 N0 = SRL.getOperand(0);
14566 }
14567
14568 // If the load is shifted left (and the result isn't shifted back right), we
14569 // can fold a truncate through the shift. The typical scenario is that N
14570 // points at a TRUNCATE here so the attempted fold is:
14571 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14572 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14573 unsigned ShLeftAmt = 0;
14574 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14575 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14576 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14577 ShLeftAmt = N01->getZExtValue();
14578 N0 = N0.getOperand(0);
14579 }
14580 }
14581
14582 // If we haven't found a load, we can't narrow it.
14583 if (!isa<LoadSDNode>(N0))
14584 return SDValue();
14585
14586 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14587 // Reducing the width of a volatile load is illegal. For atomics, we may be
14588 // able to reduce the width provided we never widen again. (see D66309)
14589 if (!LN0->isSimple() ||
14590 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14591 return SDValue();
14592
14593 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14594 unsigned LVTStoreBits =
14596 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14597 return LVTStoreBits - EVTStoreBits - ShAmt;
14598 };
14599
14600 // We need to adjust the pointer to the load by ShAmt bits in order to load
14601 // the correct bytes.
14602 unsigned PtrAdjustmentInBits =
14603 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14604
14605 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14606 SDLoc DL(LN0);
14607 // The original load itself didn't wrap, so an offset within it doesn't.
14609 Flags.setNoUnsignedWrap(true);
14610 SDValue NewPtr = DAG.getMemBasePlusOffset(
14611 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14612 AddToWorklist(NewPtr.getNode());
14613
14614 SDValue Load;
14615 if (ExtType == ISD::NON_EXTLOAD)
14616 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14617 LN0->getPointerInfo().getWithOffset(PtrOff),
14618 LN0->getOriginalAlign(),
14619 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14620 else
14621 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14622 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14623 LN0->getOriginalAlign(),
14624 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14625
14626 // Replace the old load's chain with the new load's chain.
14627 WorklistRemover DeadNodes(*this);
14628 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14629
14630 // Shift the result left, if we've swallowed a left shift.
14632 if (ShLeftAmt != 0) {
14633 // If the shift amount is as large as the result size (but, presumably,
14634 // no larger than the source) then the useful bits of the result are
14635 // zero; we can't simply return the shortened shift, because the result
14636 // of that operation is undefined.
14637 if (ShLeftAmt >= VT.getScalarSizeInBits())
14638 Result = DAG.getConstant(0, DL, VT);
14639 else
14640 Result = DAG.getNode(ISD::SHL, DL, VT, Result,
14641 DAG.getShiftAmountConstant(ShLeftAmt, VT, DL));
14642 }
14643
14644 if (ShiftedOffset != 0) {
14645 // We're using a shifted mask, so the load now has an offset. This means
14646 // that data has been loaded into the lower bytes than it would have been
14647 // before, so we need to shl the loaded data into the correct position in the
14648 // register.
14649 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14650 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14651 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14652 }
14653
14654 // Return the new loaded value.
14655 return Result;
14656}
14657
14658SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14659 SDValue N0 = N->getOperand(0);
14660 SDValue N1 = N->getOperand(1);
14661 EVT VT = N->getValueType(0);
14662 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14663 unsigned VTBits = VT.getScalarSizeInBits();
14664 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14665
14666 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14667 if (N0.isUndef())
14668 return DAG.getConstant(0, SDLoc(N), VT);
14669
14670 // fold (sext_in_reg c1) -> c1
14672 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14673
14674 // If the input is already sign extended, just drop the extension.
14675 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14676 return N0;
14677
14678 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14679 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14680 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14681 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14682 N1);
14683
14684 // fold (sext_in_reg (sext x)) -> (sext x)
14685 // fold (sext_in_reg (aext x)) -> (sext x)
14686 // if x is small enough or if we know that x has more than 1 sign bit and the
14687 // sign_extend_inreg is extending from one of them.
14688 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14689 SDValue N00 = N0.getOperand(0);
14690 unsigned N00Bits = N00.getScalarValueSizeInBits();
14691 if ((N00Bits <= ExtVTBits ||
14692 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14693 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14694 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14695 }
14696
14697 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14698 // if x is small enough or if we know that x has more than 1 sign bit and the
14699 // sign_extend_inreg is extending from one of them.
14701 SDValue N00 = N0.getOperand(0);
14702 unsigned N00Bits = N00.getScalarValueSizeInBits();
14703 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14704 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14705 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14706 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14707 if ((N00Bits == ExtVTBits ||
14708 (!IsZext && (N00Bits < ExtVTBits ||
14709 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14710 (!LegalOperations ||
14712 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14713 }
14714
14715 // fold (sext_in_reg (zext x)) -> (sext x)
14716 // iff we are extending the source sign bit.
14717 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14718 SDValue N00 = N0.getOperand(0);
14719 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14720 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14721 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14722 }
14723
14724 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14725 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14726 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14727
14728 // fold operands of sext_in_reg based on knowledge that the top bits are not
14729 // demanded.
14731 return SDValue(N, 0);
14732
14733 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14734 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14735 if (SDValue NarrowLoad = reduceLoadWidth(N))
14736 return NarrowLoad;
14737
14738 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14739 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14740 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14741 if (N0.getOpcode() == ISD::SRL) {
14742 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14743 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14744 // We can turn this into an SRA iff the input to the SRL is already sign
14745 // extended enough.
14746 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14747 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14748 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14749 N0.getOperand(1));
14750 }
14751 }
14752
14753 // fold (sext_inreg (extload x)) -> (sextload x)
14754 // If sextload is not supported by target, we can only do the combine when
14755 // load has one use. Doing otherwise can block folding the extload with other
14756 // extends that the target does support.
14757 if (ISD::isEXTLoad(N0.getNode()) &&
14759 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14760 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14761 N0.hasOneUse()) ||
14762 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14763 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14764 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14765 LN0->getChain(),
14766 LN0->getBasePtr(), ExtVT,
14767 LN0->getMemOperand());
14768 CombineTo(N, ExtLoad);
14769 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14770 AddToWorklist(ExtLoad.getNode());
14771 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14772 }
14773
14774 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14776 N0.hasOneUse() &&
14777 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14778 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14779 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14780 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14781 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14782 LN0->getChain(),
14783 LN0->getBasePtr(), ExtVT,
14784 LN0->getMemOperand());
14785 CombineTo(N, ExtLoad);
14786 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14787 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14788 }
14789
14790 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14791 // ignore it if the masked load is already sign extended
14792 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14793 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14794 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14795 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14796 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14797 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14798 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14799 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14800 CombineTo(N, ExtMaskedLoad);
14801 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14802 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14803 }
14804 }
14805
14806 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14807 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14808 if (SDValue(GN0, 0).hasOneUse() &&
14809 ExtVT == GN0->getMemoryVT() &&
14811 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14812 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14813
14814 SDValue ExtLoad = DAG.getMaskedGather(
14815 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14816 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14817
14818 CombineTo(N, ExtLoad);
14819 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14820 AddToWorklist(ExtLoad.getNode());
14821 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14822 }
14823 }
14824
14825 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14826 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14827 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14828 N0.getOperand(1), false))
14829 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14830 }
14831
14832 // Fold (iM_signext_inreg
14833 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14834 // from iN)
14835 // -> (extract_subvector (signext iN_v to iM))
14836 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14838 SDValue InnerExt = N0.getOperand(0);
14839 EVT InnerExtVT = InnerExt->getValueType(0);
14840 SDValue Extendee = InnerExt->getOperand(0);
14841
14842 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14843 (!LegalOperations ||
14844 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14845 SDValue SignExtExtendee =
14846 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14847 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14848 N0.getOperand(1));
14849 }
14850 }
14851
14852 return SDValue();
14853}
14854
14856 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14857 bool LegalOperations) {
14858 unsigned InregOpcode = N->getOpcode();
14859 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14860
14861 SDValue Src = N->getOperand(0);
14862 EVT VT = N->getValueType(0);
14863 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14864 Src.getValueType().getVectorElementType(),
14866
14867 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14868 "Expected EXTEND_VECTOR_INREG dag node in input!");
14869
14870 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14871 // FIXME: one-use check may be overly restrictive
14872 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14873 return SDValue();
14874
14875 // Profitability check: we must be extending exactly one of it's operands.
14876 // FIXME: this is probably overly restrictive.
14877 Src = Src.getOperand(0);
14878 if (Src.getValueType() != SrcVT)
14879 return SDValue();
14880
14881 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14882 return SDValue();
14883
14884 return DAG.getNode(Opcode, DL, VT, Src);
14885}
14886
14887SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14888 SDValue N0 = N->getOperand(0);
14889 EVT VT = N->getValueType(0);
14890 SDLoc DL(N);
14891
14892 if (N0.isUndef()) {
14893 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14894 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14895 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14896 ? DAG.getUNDEF(VT)
14897 : DAG.getConstant(0, DL, VT);
14898 }
14899
14900 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14901 return Res;
14902
14904 return SDValue(N, 0);
14905
14907 LegalOperations))
14908 return R;
14909
14910 return SDValue();
14911}
14912
14913SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14914 SDValue N0 = N->getOperand(0);
14915 EVT VT = N->getValueType(0);
14916 EVT SrcVT = N0.getValueType();
14917 bool isLE = DAG.getDataLayout().isLittleEndian();
14918 SDLoc DL(N);
14919
14920 // trunc(undef) = undef
14921 if (N0.isUndef())
14922 return DAG.getUNDEF(VT);
14923
14924 // fold (truncate (truncate x)) -> (truncate x)
14925 if (N0.getOpcode() == ISD::TRUNCATE)
14926 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14927
14928 // fold (truncate c1) -> c1
14929 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14930 return C;
14931
14932 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14933 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14934 N0.getOpcode() == ISD::SIGN_EXTEND ||
14935 N0.getOpcode() == ISD::ANY_EXTEND) {
14936 // if the source is smaller than the dest, we still need an extend.
14937 if (N0.getOperand(0).getValueType().bitsLT(VT))
14938 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14939 // if the source is larger than the dest, than we just need the truncate.
14940 if (N0.getOperand(0).getValueType().bitsGT(VT))
14941 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14942 // if the source and dest are the same type, we can drop both the extend
14943 // and the truncate.
14944 return N0.getOperand(0);
14945 }
14946
14947 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14948 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14949 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14950 N0.hasOneUse()) {
14951 SDValue X = N0.getOperand(0);
14952 SDValue ExtVal = N0.getOperand(1);
14953 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14954 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14955 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14956 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14957 }
14958 }
14959
14960 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14961 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14962 return SDValue();
14963
14964 // Fold extract-and-trunc into a narrow extract. For example:
14965 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14966 // i32 y = TRUNCATE(i64 x)
14967 // -- becomes --
14968 // v16i8 b = BITCAST (v2i64 val)
14969 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14970 //
14971 // Note: We only run this optimization after type legalization (which often
14972 // creates this pattern) and before operation legalization after which
14973 // we need to be more careful about the vector instructions that we generate.
14974 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14975 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14976 EVT VecTy = N0.getOperand(0).getValueType();
14977 EVT ExTy = N0.getValueType();
14978 EVT TrTy = N->getValueType(0);
14979
14980 auto EltCnt = VecTy.getVectorElementCount();
14981 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14982 auto NewEltCnt = EltCnt * SizeRatio;
14983
14984 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14985 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14986
14987 SDValue EltNo = N0->getOperand(1);
14988 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14989 int Elt = EltNo->getAsZExtVal();
14990 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14991 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14992 DAG.getBitcast(NVT, N0.getOperand(0)),
14994 }
14995 }
14996
14997 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14998 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14999 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
15000 TLI.isTruncateFree(SrcVT, VT)) {
15001 SDLoc SL(N0);
15002 SDValue Cond = N0.getOperand(0);
15003 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
15004 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
15005 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
15006 }
15007 }
15008
15009 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
15010 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
15011 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
15012 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
15013 SDValue Amt = N0.getOperand(1);
15014 KnownBits Known = DAG.computeKnownBits(Amt);
15015 unsigned Size = VT.getScalarSizeInBits();
15016 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
15017 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
15018 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15019 if (AmtVT != Amt.getValueType()) {
15020 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
15021 AddToWorklist(Amt.getNode());
15022 }
15023 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
15024 }
15025 }
15026
15027 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
15028 return V;
15029
15030 if (SDValue ABD = foldABSToABD(N, DL))
15031 return ABD;
15032
15033 // Attempt to pre-truncate BUILD_VECTOR sources.
15034 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
15035 N0.hasOneUse() &&
15036 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
15037 // Avoid creating illegal types if running after type legalizer.
15038 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
15039 EVT SVT = VT.getScalarType();
15040 SmallVector<SDValue, 8> TruncOps;
15041 for (const SDValue &Op : N0->op_values()) {
15042 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
15043 TruncOps.push_back(TruncOp);
15044 }
15045 return DAG.getBuildVector(VT, DL, TruncOps);
15046 }
15047
15048 // trunc (splat_vector x) -> splat_vector (trunc x)
15049 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
15050 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
15051 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
15052 EVT SVT = VT.getScalarType();
15053 return DAG.getSplatVector(
15054 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
15055 }
15056
15057 // Fold a series of buildvector, bitcast, and truncate if possible.
15058 // For example fold
15059 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
15060 // (2xi32 (buildvector x, y)).
15061 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
15062 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
15064 N0.getOperand(0).hasOneUse()) {
15065 SDValue BuildVect = N0.getOperand(0);
15066 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
15067 EVT TruncVecEltTy = VT.getVectorElementType();
15068
15069 // Check that the element types match.
15070 if (BuildVectEltTy == TruncVecEltTy) {
15071 // Now we only need to compute the offset of the truncated elements.
15072 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15073 unsigned TruncVecNumElts = VT.getVectorNumElements();
15074 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15075
15076 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15077 "Invalid number of elements");
15078
15080 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
15081 Opnds.push_back(BuildVect.getOperand(i));
15082
15083 return DAG.getBuildVector(VT, DL, Opnds);
15084 }
15085 }
15086
15087 // fold (truncate (load x)) -> (smaller load x)
15088 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15089 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15090 if (SDValue Reduced = reduceLoadWidth(N))
15091 return Reduced;
15092
15093 // Handle the case where the truncated result is at least as wide as the
15094 // loaded type.
15095 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15096 auto *LN0 = cast<LoadSDNode>(N0);
15097 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15098 SDValue NewLoad = DAG.getExtLoad(
15099 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15100 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15101 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15102 return NewLoad;
15103 }
15104 }
15105 }
15106
15107 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15108 // where ... are all 'undef'.
15109 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15111 SDValue V;
15112 unsigned Idx = 0;
15113 unsigned NumDefs = 0;
15114
15115 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15116 SDValue X = N0.getOperand(i);
15117 if (!X.isUndef()) {
15118 V = X;
15119 Idx = i;
15120 NumDefs++;
15121 }
15122 // Stop if more than one members are non-undef.
15123 if (NumDefs > 1)
15124 break;
15125
15128 X.getValueType().getVectorElementCount()));
15129 }
15130
15131 if (NumDefs == 0)
15132 return DAG.getUNDEF(VT);
15133
15134 if (NumDefs == 1) {
15135 assert(V.getNode() && "The single defined operand is empty!");
15137 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15138 if (i != Idx) {
15139 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15140 continue;
15141 }
15142 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15143 AddToWorklist(NV.getNode());
15144 Opnds.push_back(NV);
15145 }
15146 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15147 }
15148 }
15149
15150 // Fold truncate of a bitcast of a vector to an extract of the low vector
15151 // element.
15152 //
15153 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15154 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15155 SDValue VecSrc = N0.getOperand(0);
15156 EVT VecSrcVT = VecSrc.getValueType();
15157 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15158 (!LegalOperations ||
15159 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15160 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15161 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15163 }
15164 }
15165
15166 // Simplify the operands using demanded-bits information.
15168 return SDValue(N, 0);
15169
15170 // fold (truncate (extract_subvector(ext x))) ->
15171 // (extract_subvector x)
15172 // TODO: This can be generalized to cover cases where the truncate and extract
15173 // do not fully cancel each other out.
15174 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15175 SDValue N00 = N0.getOperand(0);
15176 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15177 N00.getOpcode() == ISD::ZERO_EXTEND ||
15178 N00.getOpcode() == ISD::ANY_EXTEND) {
15179 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15181 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15182 N00.getOperand(0), N0.getOperand(1));
15183 }
15184 }
15185
15186 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15187 return NewVSel;
15188
15189 // Narrow a suitable binary operation with a non-opaque constant operand by
15190 // moving it ahead of the truncate. This is limited to pre-legalization
15191 // because targets may prefer a wider type during later combines and invert
15192 // this transform.
15193 switch (N0.getOpcode()) {
15194 case ISD::ADD:
15195 case ISD::SUB:
15196 case ISD::MUL:
15197 case ISD::AND:
15198 case ISD::OR:
15199 case ISD::XOR:
15200 if (!LegalOperations && N0.hasOneUse() &&
15201 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15202 isConstantOrConstantVector(N0.getOperand(1), true))) {
15203 // TODO: We already restricted this to pre-legalization, but for vectors
15204 // we are extra cautious to not create an unsupported operation.
15205 // Target-specific changes are likely needed to avoid regressions here.
15206 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15207 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15208 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15209 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15210 }
15211 }
15212 break;
15213 case ISD::ADDE:
15214 case ISD::UADDO_CARRY:
15215 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15216 // (trunc uaddo_carry(X, Y, Carry)) ->
15217 // (uaddo_carry trunc(X), trunc(Y), Carry)
15218 // When the adde's carry is not used.
15219 // We only do for uaddo_carry before legalize operation
15220 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15221 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15222 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15223 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15224 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15225 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15226 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15227 }
15228 break;
15229 case ISD::USUBSAT:
15230 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15231 // enough to know that the upper bits are zero we must ensure that we don't
15232 // introduce an extra truncate.
15233 if (!LegalOperations && N0.hasOneUse() &&
15236 VT.getScalarSizeInBits() &&
15237 hasOperation(N0.getOpcode(), VT)) {
15238 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15239 DAG, DL);
15240 }
15241 break;
15242 }
15243
15244 return SDValue();
15245}
15246
15247static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15248 SDValue Elt = N->getOperand(i);
15249 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15250 return Elt.getNode();
15251 return Elt.getOperand(Elt.getResNo()).getNode();
15252}
15253
15254/// build_pair (load, load) -> load
15255/// if load locations are consecutive.
15256SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15257 assert(N->getOpcode() == ISD::BUILD_PAIR);
15258
15259 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15260 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15261
15262 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15263 // most significant part in elt 1. So when combining into one large load, we
15264 // need to consider the endianness.
15265 if (DAG.getDataLayout().isBigEndian())
15266 std::swap(LD1, LD2);
15267
15268 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15269 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15270 LD1->getAddressSpace() != LD2->getAddressSpace())
15271 return SDValue();
15272
15273 unsigned LD1Fast = 0;
15274 EVT LD1VT = LD1->getValueType(0);
15275 unsigned LD1Bytes = LD1VT.getStoreSize();
15276 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15277 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15278 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15279 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15280 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15281 LD1->getPointerInfo(), LD1->getAlign());
15282
15283 return SDValue();
15284}
15285
15286static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15287 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15288 // and Lo parts; on big-endian machines it doesn't.
15289 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15290}
15291
15292SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15293 const TargetLowering &TLI) {
15294 // If this is not a bitcast to an FP type or if the target doesn't have
15295 // IEEE754-compliant FP logic, we're done.
15296 EVT VT = N->getValueType(0);
15297 SDValue N0 = N->getOperand(0);
15298 EVT SourceVT = N0.getValueType();
15299
15300 if (!VT.isFloatingPoint())
15301 return SDValue();
15302
15303 // TODO: Handle cases where the integer constant is a different scalar
15304 // bitwidth to the FP.
15305 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15306 return SDValue();
15307
15308 unsigned FPOpcode;
15309 APInt SignMask;
15310 switch (N0.getOpcode()) {
15311 case ISD::AND:
15312 FPOpcode = ISD::FABS;
15313 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15314 break;
15315 case ISD::XOR:
15316 FPOpcode = ISD::FNEG;
15317 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15318 break;
15319 case ISD::OR:
15320 FPOpcode = ISD::FABS;
15321 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15322 break;
15323 default:
15324 return SDValue();
15325 }
15326
15327 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15328 return SDValue();
15329
15330 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15331 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15332 // removing this would require more changes.
15333 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15334 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15335 return true;
15336
15337 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15338 };
15339
15340 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15341 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15342 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15343 // fneg (fabs X)
15344 SDValue LogicOp0 = N0.getOperand(0);
15345 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15346 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15347 IsBitCastOrFree(LogicOp0, VT)) {
15348 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15349 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15350 NumFPLogicOpsConv++;
15351 if (N0.getOpcode() == ISD::OR)
15352 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15353 return FPOp;
15354 }
15355
15356 return SDValue();
15357}
15358
15359SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15360 SDValue N0 = N->getOperand(0);
15361 EVT VT = N->getValueType(0);
15362
15363 if (N0.isUndef())
15364 return DAG.getUNDEF(VT);
15365
15366 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15367 // Only do this before legalize types, unless both types are integer and the
15368 // scalar type is legal. Only do this before legalize ops, since the target
15369 // maybe depending on the bitcast.
15370 // First check to see if this is all constant.
15371 // TODO: Support FP bitcasts after legalize types.
15372 if (VT.isVector() &&
15373 (!LegalTypes ||
15374 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15375 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15376 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15377 cast<BuildVectorSDNode>(N0)->isConstant())
15378 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15380
15381 // If the input is a constant, let getNode fold it.
15382 if (isIntOrFPConstant(N0)) {
15383 // If we can't allow illegal operations, we need to check that this is just
15384 // a fp -> int or int -> conversion and that the resulting operation will
15385 // be legal.
15386 if (!LegalOperations ||
15387 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15389 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15390 TLI.isOperationLegal(ISD::Constant, VT))) {
15391 SDValue C = DAG.getBitcast(VT, N0);
15392 if (C.getNode() != N)
15393 return C;
15394 }
15395 }
15396
15397 // (conv (conv x, t1), t2) -> (conv x, t2)
15398 if (N0.getOpcode() == ISD::BITCAST)
15399 return DAG.getBitcast(VT, N0.getOperand(0));
15400
15401 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15402 // iff the current bitwise logicop type isn't legal
15403 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15404 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15405 auto IsFreeBitcast = [VT](SDValue V) {
15406 return (V.getOpcode() == ISD::BITCAST &&
15407 V.getOperand(0).getValueType() == VT) ||
15409 V->hasOneUse());
15410 };
15411 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15412 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15413 DAG.getBitcast(VT, N0.getOperand(0)),
15414 DAG.getBitcast(VT, N0.getOperand(1)));
15415 }
15416
15417 // fold (conv (load x)) -> (load (conv*)x)
15418 // If the resultant load doesn't need a higher alignment than the original!
15419 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15420 // Do not remove the cast if the types differ in endian layout.
15422 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15423 // If the load is volatile, we only want to change the load type if the
15424 // resulting load is legal. Otherwise we might increase the number of
15425 // memory accesses. We don't care if the original type was legal or not
15426 // as we assume software couldn't rely on the number of accesses of an
15427 // illegal type.
15428 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15429 TLI.isOperationLegal(ISD::LOAD, VT))) {
15430 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15431
15432 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15433 *LN0->getMemOperand())) {
15434 SDValue Load =
15435 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15436 LN0->getMemOperand());
15437 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15438 return Load;
15439 }
15440 }
15441
15442 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15443 return V;
15444
15445 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15446 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15447 //
15448 // For ppc_fp128:
15449 // fold (bitcast (fneg x)) ->
15450 // flipbit = signbit
15451 // (xor (bitcast x) (build_pair flipbit, flipbit))
15452 //
15453 // fold (bitcast (fabs x)) ->
15454 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15455 // (xor (bitcast x) (build_pair flipbit, flipbit))
15456 // This often reduces constant pool loads.
15457 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15458 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15459 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15460 !N0.getValueType().isVector()) {
15461 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15462 AddToWorklist(NewConv.getNode());
15463
15464 SDLoc DL(N);
15465 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15466 assert(VT.getSizeInBits() == 128);
15467 SDValue SignBit = DAG.getConstant(
15468 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15469 SDValue FlipBit;
15470 if (N0.getOpcode() == ISD::FNEG) {
15471 FlipBit = SignBit;
15472 AddToWorklist(FlipBit.getNode());
15473 } else {
15474 assert(N0.getOpcode() == ISD::FABS);
15475 SDValue Hi =
15476 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15478 SDLoc(NewConv)));
15479 AddToWorklist(Hi.getNode());
15480 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15481 AddToWorklist(FlipBit.getNode());
15482 }
15483 SDValue FlipBits =
15484 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15485 AddToWorklist(FlipBits.getNode());
15486 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15487 }
15488 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15489 if (N0.getOpcode() == ISD::FNEG)
15490 return DAG.getNode(ISD::XOR, DL, VT,
15491 NewConv, DAG.getConstant(SignBit, DL, VT));
15492 assert(N0.getOpcode() == ISD::FABS);
15493 return DAG.getNode(ISD::AND, DL, VT,
15494 NewConv, DAG.getConstant(~SignBit, DL, VT));
15495 }
15496
15497 // fold (bitconvert (fcopysign cst, x)) ->
15498 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15499 // Note that we don't handle (copysign x, cst) because this can always be
15500 // folded to an fneg or fabs.
15501 //
15502 // For ppc_fp128:
15503 // fold (bitcast (fcopysign cst, x)) ->
15504 // flipbit = (and (extract_element
15505 // (xor (bitcast cst), (bitcast x)), 0),
15506 // signbit)
15507 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15508 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15509 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15510 !VT.isVector()) {
15511 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15512 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15513 if (isTypeLegal(IntXVT)) {
15514 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15515 AddToWorklist(X.getNode());
15516
15517 // If X has a different width than the result/lhs, sext it or truncate it.
15518 unsigned VTWidth = VT.getSizeInBits();
15519 if (OrigXWidth < VTWidth) {
15520 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15521 AddToWorklist(X.getNode());
15522 } else if (OrigXWidth > VTWidth) {
15523 // To get the sign bit in the right place, we have to shift it right
15524 // before truncating.
15525 SDLoc DL(X);
15526 X = DAG.getNode(ISD::SRL, DL,
15527 X.getValueType(), X,
15528 DAG.getConstant(OrigXWidth-VTWidth, DL,
15529 X.getValueType()));
15530 AddToWorklist(X.getNode());
15531 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15532 AddToWorklist(X.getNode());
15533 }
15534
15535 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15536 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15537 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15538 AddToWorklist(Cst.getNode());
15539 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15540 AddToWorklist(X.getNode());
15541 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15542 AddToWorklist(XorResult.getNode());
15543 SDValue XorResult64 = DAG.getNode(
15544 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15546 SDLoc(XorResult)));
15547 AddToWorklist(XorResult64.getNode());
15548 SDValue FlipBit =
15549 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15550 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15551 AddToWorklist(FlipBit.getNode());
15552 SDValue FlipBits =
15553 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15554 AddToWorklist(FlipBits.getNode());
15555 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15556 }
15557 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15558 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15559 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15560 AddToWorklist(X.getNode());
15561
15562 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15563 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15564 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15565 AddToWorklist(Cst.getNode());
15566
15567 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15568 }
15569 }
15570
15571 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15572 if (N0.getOpcode() == ISD::BUILD_PAIR)
15573 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15574 return CombineLD;
15575
15576 // Remove double bitcasts from shuffles - this is often a legacy of
15577 // XformToShuffleWithZero being used to combine bitmaskings (of
15578 // float vectors bitcast to integer vectors) into shuffles.
15579 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15580 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15581 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15584 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15585
15586 // If operands are a bitcast, peek through if it casts the original VT.
15587 // If operands are a constant, just bitcast back to original VT.
15588 auto PeekThroughBitcast = [&](SDValue Op) {
15589 if (Op.getOpcode() == ISD::BITCAST &&
15590 Op.getOperand(0).getValueType() == VT)
15591 return SDValue(Op.getOperand(0));
15592 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15593 return DAG.getBitcast(VT, Op);
15594 return SDValue();
15595 };
15596
15597 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15598 // the result type of this bitcast. This would eliminate at least one
15599 // bitcast. See the transform in InstCombine.
15600 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15601 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15602 if (!(SV0 && SV1))
15603 return SDValue();
15604
15605 int MaskScale =
15607 SmallVector<int, 8> NewMask;
15608 for (int M : SVN->getMask())
15609 for (int i = 0; i != MaskScale; ++i)
15610 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15611
15612 SDValue LegalShuffle =
15613 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15614 if (LegalShuffle)
15615 return LegalShuffle;
15616 }
15617
15618 return SDValue();
15619}
15620
15621SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15622 EVT VT = N->getValueType(0);
15623 return CombineConsecutiveLoads(N, VT);
15624}
15625
15626SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15627 SDValue N0 = N->getOperand(0);
15628
15629 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15630 return N0;
15631
15632 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
15633 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15634 // example https://reviews.llvm.org/D136529#4120959.
15635 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15636 return SDValue();
15637
15638 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15639 // Try to push freeze through instructions that propagate but don't produce
15640 // poison as far as possible. If an operand of freeze follows three
15641 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15642 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15643 // the freeze through to the operands that are not guaranteed non-poison.
15644 // NOTE: we will strip poison-generating flags, so ignore them here.
15645 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15646 /*ConsiderFlags*/ false) ||
15647 N0->getNumValues() != 1 || !N0->hasOneUse())
15648 return SDValue();
15649
15650 bool AllowMultipleMaybePoisonOperands =
15651 N0.getOpcode() == ISD::SELECT_CC ||
15652 N0.getOpcode() == ISD::SETCC ||
15653 N0.getOpcode() == ISD::BUILD_VECTOR ||
15654 N0.getOpcode() == ISD::BUILD_PAIR ||
15657
15658 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15659 // ones" or "constant" into something that depends on FrozenUndef. We can
15660 // instead pick undef values to keep those properties, while at the same time
15661 // folding away the freeze.
15662 // If we implement a more general solution for folding away freeze(undef) in
15663 // the future, then this special handling can be removed.
15664 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15665 SDLoc DL(N0);
15666 EVT VT = N0.getValueType();
15668 return DAG.getAllOnesConstant(DL, VT);
15671 for (const SDValue &Op : N0->op_values())
15672 NewVecC.push_back(
15673 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15674 return DAG.getBuildVector(VT, DL, NewVecC);
15675 }
15676 }
15677
15678 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15679 for (SDValue Op : N0->ops()) {
15680 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15681 /*Depth*/ 1))
15682 continue;
15683 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15684 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15685 if (!HadMaybePoisonOperands)
15686 continue;
15687 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15688 // Multiple maybe-poison ops when not allowed - bail out.
15689 return SDValue();
15690 }
15691 }
15692 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15693 // it could create undef or poison due to it's poison-generating flags.
15694 // So not finding any maybe-poison operands is fine.
15695
15696 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15697 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15698 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15699 continue;
15700 // First, freeze each offending operand.
15701 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15702 // Then, change all other uses of unfrozen operand to use frozen operand.
15703 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15704 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15705 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15706 // But, that also updated the use in the freeze we just created, thus
15707 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15708 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15709 MaybePoisonOperand);
15710 }
15711 }
15712
15713 // This node has been merged with another.
15714 if (N->getOpcode() == ISD::DELETED_NODE)
15715 return SDValue(N, 0);
15716
15717 // The whole node may have been updated, so the value we were holding
15718 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15719 N0 = N->getOperand(0);
15720
15721 // Finally, recreate the node, it's operands were updated to use
15722 // frozen operands, so we just need to use it's "original" operands.
15723 SmallVector<SDValue> Ops(N0->ops());
15724 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15725 for (SDValue &Op : Ops) {
15726 if (Op.getOpcode() == ISD::UNDEF)
15727 Op = DAG.getFreeze(Op);
15728 }
15729
15730 SDValue R;
15731 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
15732 // Special case handling for ShuffleVectorSDNode nodes.
15733 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
15734 SVN->getMask());
15735 } else {
15736 // NOTE: this strips poison generating flags.
15737 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15738 }
15739 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15740 "Can't create node that may be undef/poison!");
15741 return R;
15742}
15743
15744/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15745/// operands. DstEltVT indicates the destination element value type.
15746SDValue DAGCombiner::
15747ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15748 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15749
15750 // If this is already the right type, we're done.
15751 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15752
15753 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15754 unsigned DstBitSize = DstEltVT.getSizeInBits();
15755
15756 // If this is a conversion of N elements of one type to N elements of another
15757 // type, convert each element. This handles FP<->INT cases.
15758 if (SrcBitSize == DstBitSize) {
15760 for (SDValue Op : BV->op_values()) {
15761 // If the vector element type is not legal, the BUILD_VECTOR operands
15762 // are promoted and implicitly truncated. Make that explicit here.
15763 if (Op.getValueType() != SrcEltVT)
15764 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15765 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15766 AddToWorklist(Ops.back().getNode());
15767 }
15768 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15770 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15771 }
15772
15773 // Otherwise, we're growing or shrinking the elements. To avoid having to
15774 // handle annoying details of growing/shrinking FP values, we convert them to
15775 // int first.
15776 if (SrcEltVT.isFloatingPoint()) {
15777 // Convert the input float vector to a int vector where the elements are the
15778 // same sizes.
15779 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15780 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15781 SrcEltVT = IntVT;
15782 }
15783
15784 // Now we know the input is an integer vector. If the output is a FP type,
15785 // convert to integer first, then to FP of the right size.
15786 if (DstEltVT.isFloatingPoint()) {
15787 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15788 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15789
15790 // Next, convert to FP elements of the same size.
15791 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15792 }
15793
15794 // Okay, we know the src/dst types are both integers of differing types.
15795 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15796
15797 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15798 // BuildVectorSDNode?
15799 auto *BVN = cast<BuildVectorSDNode>(BV);
15800
15801 // Extract the constant raw bit data.
15802 BitVector UndefElements;
15803 SmallVector<APInt> RawBits;
15804 bool IsLE = DAG.getDataLayout().isLittleEndian();
15805 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15806 return SDValue();
15807
15808 SDLoc DL(BV);
15810 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15811 if (UndefElements[I])
15812 Ops.push_back(DAG.getUNDEF(DstEltVT));
15813 else
15814 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15815 }
15816
15817 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15818 return DAG.getBuildVector(VT, DL, Ops);
15819}
15820
15821// Returns true if floating point contraction is allowed on the FMUL-SDValue
15822// `N`
15824 assert(N.getOpcode() == ISD::FMUL);
15825
15826 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15827 N->getFlags().hasAllowContract();
15828}
15829
15830// Returns true if `N` can assume no infinities involved in its computation.
15832 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15833}
15834
15835/// Try to perform FMA combining on a given FADD node.
15836template <class MatchContextClass>
15837SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15838 SDValue N0 = N->getOperand(0);
15839 SDValue N1 = N->getOperand(1);
15840 EVT VT = N->getValueType(0);
15841 SDLoc SL(N);
15842 MatchContextClass matcher(DAG, TLI, N);
15843 const TargetOptions &Options = DAG.getTarget().Options;
15844
15845 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15846
15847 // Floating-point multiply-add with intermediate rounding.
15848 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15849 // FIXME: Add VP_FMAD opcode.
15850 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15851
15852 // Floating-point multiply-add without intermediate rounding.
15853 bool HasFMA =
15855 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15856
15857 // No valid opcode, do not combine.
15858 if (!HasFMAD && !HasFMA)
15859 return SDValue();
15860
15861 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15862 Options.UnsafeFPMath || HasFMAD);
15863 // If the addition is not contractable, do not combine.
15864 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15865 return SDValue();
15866
15867 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15868 // beneficial. It does not reduce latency. It increases register pressure. It
15869 // replaces an fadd with an fma which is a more complex instruction, so is
15870 // likely to have a larger encoding, use more functional units, etc.
15871 if (N0 == N1)
15872 return SDValue();
15873
15874 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15875 return SDValue();
15876
15877 // Always prefer FMAD to FMA for precision.
15878 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15880
15881 auto isFusedOp = [&](SDValue N) {
15882 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15883 };
15884
15885 // Is the node an FMUL and contractable either due to global flags or
15886 // SDNodeFlags.
15887 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15888 if (!matcher.match(N, ISD::FMUL))
15889 return false;
15890 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15891 };
15892 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15893 // prefer to fold the multiply with fewer uses.
15895 if (N0->use_size() > N1->use_size())
15896 std::swap(N0, N1);
15897 }
15898
15899 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15900 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15901 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15902 N0.getOperand(1), N1);
15903 }
15904
15905 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15906 // Note: Commutes FADD operands.
15907 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15908 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15909 N1.getOperand(1), N0);
15910 }
15911
15912 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15913 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15914 // This also works with nested fma instructions:
15915 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15916 // fma A, B, (fma C, D, fma (E, F, G))
15917 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15918 // fma A, B, (fma C, D, fma (E, F, G)).
15919 // This requires reassociation because it changes the order of operations.
15920 bool CanReassociate =
15921 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15922 if (CanReassociate) {
15923 SDValue FMA, E;
15924 if (isFusedOp(N0) && N0.hasOneUse()) {
15925 FMA = N0;
15926 E = N1;
15927 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15928 FMA = N1;
15929 E = N0;
15930 }
15931
15932 SDValue TmpFMA = FMA;
15933 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15934 SDValue FMul = TmpFMA->getOperand(2);
15935 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15936 SDValue C = FMul.getOperand(0);
15937 SDValue D = FMul.getOperand(1);
15938 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15940 // Replacing the inner FMul could cause the outer FMA to be simplified
15941 // away.
15942 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15943 }
15944
15945 TmpFMA = TmpFMA->getOperand(2);
15946 }
15947 }
15948
15949 // Look through FP_EXTEND nodes to do more combining.
15950
15951 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15952 if (matcher.match(N0, ISD::FP_EXTEND)) {
15953 SDValue N00 = N0.getOperand(0);
15954 if (isContractableFMUL(N00) &&
15955 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15956 N00.getValueType())) {
15957 return matcher.getNode(
15958 PreferredFusedOpcode, SL, VT,
15959 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15960 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15961 }
15962 }
15963
15964 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15965 // Note: Commutes FADD operands.
15966 if (matcher.match(N1, ISD::FP_EXTEND)) {
15967 SDValue N10 = N1.getOperand(0);
15968 if (isContractableFMUL(N10) &&
15969 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15970 N10.getValueType())) {
15971 return matcher.getNode(
15972 PreferredFusedOpcode, SL, VT,
15973 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15974 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15975 }
15976 }
15977
15978 // More folding opportunities when target permits.
15979 if (Aggressive) {
15980 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15981 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15982 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15983 SDValue Z) {
15984 return matcher.getNode(
15985 PreferredFusedOpcode, SL, VT, X, Y,
15986 matcher.getNode(PreferredFusedOpcode, SL, VT,
15987 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15988 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15989 };
15990 if (isFusedOp(N0)) {
15991 SDValue N02 = N0.getOperand(2);
15992 if (matcher.match(N02, ISD::FP_EXTEND)) {
15993 SDValue N020 = N02.getOperand(0);
15994 if (isContractableFMUL(N020) &&
15995 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15996 N020.getValueType())) {
15997 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15998 N020.getOperand(0), N020.getOperand(1),
15999 N1);
16000 }
16001 }
16002 }
16003
16004 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
16005 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
16006 // FIXME: This turns two single-precision and one double-precision
16007 // operation into two double-precision operations, which might not be
16008 // interesting for all targets, especially GPUs.
16009 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
16010 SDValue Z) {
16011 return matcher.getNode(
16012 PreferredFusedOpcode, SL, VT,
16013 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
16014 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
16015 matcher.getNode(PreferredFusedOpcode, SL, VT,
16016 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
16017 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
16018 };
16019 if (N0.getOpcode() == ISD::FP_EXTEND) {
16020 SDValue N00 = N0.getOperand(0);
16021 if (isFusedOp(N00)) {
16022 SDValue N002 = N00.getOperand(2);
16023 if (isContractableFMUL(N002) &&
16024 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16025 N00.getValueType())) {
16026 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
16027 N002.getOperand(0), N002.getOperand(1),
16028 N1);
16029 }
16030 }
16031 }
16032
16033 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
16034 // -> (fma y, z, (fma (fpext u), (fpext v), x))
16035 if (isFusedOp(N1)) {
16036 SDValue N12 = N1.getOperand(2);
16037 if (N12.getOpcode() == ISD::FP_EXTEND) {
16038 SDValue N120 = N12.getOperand(0);
16039 if (isContractableFMUL(N120) &&
16040 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16041 N120.getValueType())) {
16042 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
16043 N120.getOperand(0), N120.getOperand(1),
16044 N0);
16045 }
16046 }
16047 }
16048
16049 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
16050 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
16051 // FIXME: This turns two single-precision and one double-precision
16052 // operation into two double-precision operations, which might not be
16053 // interesting for all targets, especially GPUs.
16054 if (N1.getOpcode() == ISD::FP_EXTEND) {
16055 SDValue N10 = N1.getOperand(0);
16056 if (isFusedOp(N10)) {
16057 SDValue N102 = N10.getOperand(2);
16058 if (isContractableFMUL(N102) &&
16059 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16060 N10.getValueType())) {
16061 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
16062 N102.getOperand(0), N102.getOperand(1),
16063 N0);
16064 }
16065 }
16066 }
16067 }
16068
16069 return SDValue();
16070}
16071
16072/// Try to perform FMA combining on a given FSUB node.
16073template <class MatchContextClass>
16074SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16075 SDValue N0 = N->getOperand(0);
16076 SDValue N1 = N->getOperand(1);
16077 EVT VT = N->getValueType(0);
16078 SDLoc SL(N);
16079 MatchContextClass matcher(DAG, TLI, N);
16080 const TargetOptions &Options = DAG.getTarget().Options;
16081
16082 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16083
16084 // Floating-point multiply-add with intermediate rounding.
16085 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16086 // FIXME: Add VP_FMAD opcode.
16087 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16088
16089 // Floating-point multiply-add without intermediate rounding.
16090 bool HasFMA =
16092 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
16093
16094 // No valid opcode, do not combine.
16095 if (!HasFMAD && !HasFMA)
16096 return SDValue();
16097
16098 const SDNodeFlags Flags = N->getFlags();
16099 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16100 Options.UnsafeFPMath || HasFMAD);
16101
16102 // If the subtraction is not contractable, do not combine.
16103 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16104 return SDValue();
16105
16106 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16107 return SDValue();
16108
16109 // Always prefer FMAD to FMA for precision.
16110 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16112 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16113
16114 // Is the node an FMUL and contractable either due to global flags or
16115 // SDNodeFlags.
16116 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16117 if (!matcher.match(N, ISD::FMUL))
16118 return false;
16119 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16120 };
16121
16122 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16123 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16124 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16125 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16126 XY.getOperand(1),
16127 matcher.getNode(ISD::FNEG, SL, VT, Z));
16128 }
16129 return SDValue();
16130 };
16131
16132 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16133 // Note: Commutes FSUB operands.
16134 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16135 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16136 return matcher.getNode(
16137 PreferredFusedOpcode, SL, VT,
16138 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16139 YZ.getOperand(1), X);
16140 }
16141 return SDValue();
16142 };
16143
16144 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16145 // prefer to fold the multiply with fewer uses.
16146 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16147 (N0->use_size() > N1->use_size())) {
16148 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16149 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16150 return V;
16151 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16152 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16153 return V;
16154 } else {
16155 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16156 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16157 return V;
16158 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16159 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16160 return V;
16161 }
16162
16163 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16164 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16165 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16166 SDValue N00 = N0.getOperand(0).getOperand(0);
16167 SDValue N01 = N0.getOperand(0).getOperand(1);
16168 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16169 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16170 matcher.getNode(ISD::FNEG, SL, VT, N1));
16171 }
16172
16173 // Look through FP_EXTEND nodes to do more combining.
16174
16175 // fold (fsub (fpext (fmul x, y)), z)
16176 // -> (fma (fpext x), (fpext y), (fneg z))
16177 if (matcher.match(N0, ISD::FP_EXTEND)) {
16178 SDValue N00 = N0.getOperand(0);
16179 if (isContractableFMUL(N00) &&
16180 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16181 N00.getValueType())) {
16182 return matcher.getNode(
16183 PreferredFusedOpcode, SL, VT,
16184 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16185 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16186 matcher.getNode(ISD::FNEG, SL, VT, N1));
16187 }
16188 }
16189
16190 // fold (fsub x, (fpext (fmul y, z)))
16191 // -> (fma (fneg (fpext y)), (fpext z), x)
16192 // Note: Commutes FSUB operands.
16193 if (matcher.match(N1, ISD::FP_EXTEND)) {
16194 SDValue N10 = N1.getOperand(0);
16195 if (isContractableFMUL(N10) &&
16196 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16197 N10.getValueType())) {
16198 return matcher.getNode(
16199 PreferredFusedOpcode, SL, VT,
16200 matcher.getNode(
16201 ISD::FNEG, SL, VT,
16202 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16203 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16204 }
16205 }
16206
16207 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16208 // -> (fneg (fma (fpext x), (fpext y), z))
16209 // Note: This could be removed with appropriate canonicalization of the
16210 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16211 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16212 // from implementing the canonicalization in visitFSUB.
16213 if (matcher.match(N0, ISD::FP_EXTEND)) {
16214 SDValue N00 = N0.getOperand(0);
16215 if (matcher.match(N00, ISD::FNEG)) {
16216 SDValue N000 = N00.getOperand(0);
16217 if (isContractableFMUL(N000) &&
16218 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16219 N00.getValueType())) {
16220 return matcher.getNode(
16221 ISD::FNEG, SL, VT,
16222 matcher.getNode(
16223 PreferredFusedOpcode, SL, VT,
16224 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16225 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16226 N1));
16227 }
16228 }
16229 }
16230
16231 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16232 // -> (fneg (fma (fpext x)), (fpext y), z)
16233 // Note: This could be removed with appropriate canonicalization of the
16234 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16235 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16236 // from implementing the canonicalization in visitFSUB.
16237 if (matcher.match(N0, ISD::FNEG)) {
16238 SDValue N00 = N0.getOperand(0);
16239 if (matcher.match(N00, ISD::FP_EXTEND)) {
16240 SDValue N000 = N00.getOperand(0);
16241 if (isContractableFMUL(N000) &&
16242 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16243 N000.getValueType())) {
16244 return matcher.getNode(
16245 ISD::FNEG, SL, VT,
16246 matcher.getNode(
16247 PreferredFusedOpcode, SL, VT,
16248 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16249 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16250 N1));
16251 }
16252 }
16253 }
16254
16255 auto isReassociable = [&Options](SDNode *N) {
16256 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16257 };
16258
16259 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16260 &isReassociable](SDValue N) {
16261 return isContractableFMUL(N) && isReassociable(N.getNode());
16262 };
16263
16264 auto isFusedOp = [&](SDValue N) {
16265 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16266 };
16267
16268 // More folding opportunities when target permits.
16269 if (Aggressive && isReassociable(N)) {
16270 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16271 // fold (fsub (fma x, y, (fmul u, v)), z)
16272 // -> (fma x, y (fma u, v, (fneg z)))
16273 if (CanFuse && isFusedOp(N0) &&
16274 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16275 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16276 return matcher.getNode(
16277 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16278 matcher.getNode(PreferredFusedOpcode, SL, VT,
16279 N0.getOperand(2).getOperand(0),
16280 N0.getOperand(2).getOperand(1),
16281 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16282 }
16283
16284 // fold (fsub x, (fma y, z, (fmul u, v)))
16285 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16286 if (CanFuse && isFusedOp(N1) &&
16287 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16288 N1->hasOneUse() && NoSignedZero) {
16289 SDValue N20 = N1.getOperand(2).getOperand(0);
16290 SDValue N21 = N1.getOperand(2).getOperand(1);
16291 return matcher.getNode(
16292 PreferredFusedOpcode, SL, VT,
16293 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16294 N1.getOperand(1),
16295 matcher.getNode(PreferredFusedOpcode, SL, VT,
16296 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16297 }
16298
16299 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16300 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16301 if (isFusedOp(N0) && N0->hasOneUse()) {
16302 SDValue N02 = N0.getOperand(2);
16303 if (matcher.match(N02, ISD::FP_EXTEND)) {
16304 SDValue N020 = N02.getOperand(0);
16305 if (isContractableAndReassociableFMUL(N020) &&
16306 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16307 N020.getValueType())) {
16308 return matcher.getNode(
16309 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16310 matcher.getNode(
16311 PreferredFusedOpcode, SL, VT,
16312 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16313 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16314 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16315 }
16316 }
16317 }
16318
16319 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16320 // -> (fma (fpext x), (fpext y),
16321 // (fma (fpext u), (fpext v), (fneg z)))
16322 // FIXME: This turns two single-precision and one double-precision
16323 // operation into two double-precision operations, which might not be
16324 // interesting for all targets, especially GPUs.
16325 if (matcher.match(N0, ISD::FP_EXTEND)) {
16326 SDValue N00 = N0.getOperand(0);
16327 if (isFusedOp(N00)) {
16328 SDValue N002 = N00.getOperand(2);
16329 if (isContractableAndReassociableFMUL(N002) &&
16330 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16331 N00.getValueType())) {
16332 return matcher.getNode(
16333 PreferredFusedOpcode, SL, VT,
16334 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16335 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16336 matcher.getNode(
16337 PreferredFusedOpcode, SL, VT,
16338 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16339 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16340 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16341 }
16342 }
16343 }
16344
16345 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16346 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16347 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16348 N1->hasOneUse()) {
16349 SDValue N120 = N1.getOperand(2).getOperand(0);
16350 if (isContractableAndReassociableFMUL(N120) &&
16351 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16352 N120.getValueType())) {
16353 SDValue N1200 = N120.getOperand(0);
16354 SDValue N1201 = N120.getOperand(1);
16355 return matcher.getNode(
16356 PreferredFusedOpcode, SL, VT,
16357 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16358 N1.getOperand(1),
16359 matcher.getNode(
16360 PreferredFusedOpcode, SL, VT,
16361 matcher.getNode(ISD::FNEG, SL, VT,
16362 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16363 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16364 }
16365 }
16366
16367 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16368 // -> (fma (fneg (fpext y)), (fpext z),
16369 // (fma (fneg (fpext u)), (fpext v), x))
16370 // FIXME: This turns two single-precision and one double-precision
16371 // operation into two double-precision operations, which might not be
16372 // interesting for all targets, especially GPUs.
16373 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16374 SDValue CvtSrc = N1.getOperand(0);
16375 SDValue N100 = CvtSrc.getOperand(0);
16376 SDValue N101 = CvtSrc.getOperand(1);
16377 SDValue N102 = CvtSrc.getOperand(2);
16378 if (isContractableAndReassociableFMUL(N102) &&
16379 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16380 CvtSrc.getValueType())) {
16381 SDValue N1020 = N102.getOperand(0);
16382 SDValue N1021 = N102.getOperand(1);
16383 return matcher.getNode(
16384 PreferredFusedOpcode, SL, VT,
16385 matcher.getNode(ISD::FNEG, SL, VT,
16386 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16387 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16388 matcher.getNode(
16389 PreferredFusedOpcode, SL, VT,
16390 matcher.getNode(ISD::FNEG, SL, VT,
16391 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16392 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16393 }
16394 }
16395 }
16396
16397 return SDValue();
16398}
16399
16400/// Try to perform FMA combining on a given FMUL node based on the distributive
16401/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16402/// subtraction instead of addition).
16403SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16404 SDValue N0 = N->getOperand(0);
16405 SDValue N1 = N->getOperand(1);
16406 EVT VT = N->getValueType(0);
16407 SDLoc SL(N);
16408
16409 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16410
16411 const TargetOptions &Options = DAG.getTarget().Options;
16412
16413 // The transforms below are incorrect when x == 0 and y == inf, because the
16414 // intermediate multiplication produces a nan.
16415 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16416 if (!hasNoInfs(Options, FAdd))
16417 return SDValue();
16418
16419 // Floating-point multiply-add without intermediate rounding.
16420 bool HasFMA =
16423 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16424
16425 // Floating-point multiply-add with intermediate rounding. This can result
16426 // in a less precise result due to the changed rounding order.
16427 bool HasFMAD = Options.UnsafeFPMath &&
16428 (LegalOperations && TLI.isFMADLegal(DAG, N));
16429
16430 // No valid opcode, do not combine.
16431 if (!HasFMAD && !HasFMA)
16432 return SDValue();
16433
16434 // Always prefer FMAD to FMA for precision.
16435 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16437
16438 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16439 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16440 auto FuseFADD = [&](SDValue X, SDValue Y) {
16441 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16442 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16443 if (C->isExactlyValue(+1.0))
16444 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16445 Y);
16446 if (C->isExactlyValue(-1.0))
16447 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16448 DAG.getNode(ISD::FNEG, SL, VT, Y));
16449 }
16450 }
16451 return SDValue();
16452 };
16453
16454 if (SDValue FMA = FuseFADD(N0, N1))
16455 return FMA;
16456 if (SDValue FMA = FuseFADD(N1, N0))
16457 return FMA;
16458
16459 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16460 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16461 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16462 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16463 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16464 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16465 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16466 if (C0->isExactlyValue(+1.0))
16467 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16468 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16469 Y);
16470 if (C0->isExactlyValue(-1.0))
16471 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16472 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16473 DAG.getNode(ISD::FNEG, SL, VT, Y));
16474 }
16475 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16476 if (C1->isExactlyValue(+1.0))
16477 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16478 DAG.getNode(ISD::FNEG, SL, VT, Y));
16479 if (C1->isExactlyValue(-1.0))
16480 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16481 Y);
16482 }
16483 }
16484 return SDValue();
16485 };
16486
16487 if (SDValue FMA = FuseFSUB(N0, N1))
16488 return FMA;
16489 if (SDValue FMA = FuseFSUB(N1, N0))
16490 return FMA;
16491
16492 return SDValue();
16493}
16494
16495SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16496 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16497
16498 // FADD -> FMA combines:
16499 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16500 if (Fused.getOpcode() != ISD::DELETED_NODE)
16501 AddToWorklist(Fused.getNode());
16502 return Fused;
16503 }
16504 return SDValue();
16505}
16506
16507SDValue DAGCombiner::visitFADD(SDNode *N) {
16508 SDValue N0 = N->getOperand(0);
16509 SDValue N1 = N->getOperand(1);
16512 EVT VT = N->getValueType(0);
16513 SDLoc DL(N);
16514 const TargetOptions &Options = DAG.getTarget().Options;
16515 SDNodeFlags Flags = N->getFlags();
16516 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16517
16518 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16519 return R;
16520
16521 // fold (fadd c1, c2) -> c1 + c2
16522 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16523 return C;
16524
16525 // canonicalize constant to RHS
16526 if (N0CFP && !N1CFP)
16527 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16528
16529 // fold vector ops
16530 if (VT.isVector())
16531 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16532 return FoldedVOp;
16533
16534 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16535 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16536 if (N1C && N1C->isZero())
16537 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16538 return N0;
16539
16540 if (SDValue NewSel = foldBinOpIntoSelect(N))
16541 return NewSel;
16542
16543 // fold (fadd A, (fneg B)) -> (fsub A, B)
16544 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16545 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16546 N1, DAG, LegalOperations, ForCodeSize))
16547 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16548
16549 // fold (fadd (fneg A), B) -> (fsub B, A)
16550 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16551 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16552 N0, DAG, LegalOperations, ForCodeSize))
16553 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16554
16555 auto isFMulNegTwo = [](SDValue FMul) {
16556 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16557 return false;
16558 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16559 return C && C->isExactlyValue(-2.0);
16560 };
16561
16562 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16563 if (isFMulNegTwo(N0)) {
16564 SDValue B = N0.getOperand(0);
16565 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16566 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16567 }
16568 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16569 if (isFMulNegTwo(N1)) {
16570 SDValue B = N1.getOperand(0);
16571 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16572 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16573 }
16574
16575 // No FP constant should be created after legalization as Instruction
16576 // Selection pass has a hard time dealing with FP constants.
16577 bool AllowNewConst = (Level < AfterLegalizeDAG);
16578
16579 // If nnan is enabled, fold lots of things.
16580 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16581 // If allowed, fold (fadd (fneg x), x) -> 0.0
16582 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16583 return DAG.getConstantFP(0.0, DL, VT);
16584
16585 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16586 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16587 return DAG.getConstantFP(0.0, DL, VT);
16588 }
16589
16590 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16591 // TODO: break out portions of the transformations below for which Unsafe is
16592 // considered and which do not require both nsz and reassoc
16593 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16594 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16595 AllowNewConst) {
16596 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16597 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16599 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16600 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16601 }
16602
16603 // We can fold chains of FADD's of the same value into multiplications.
16604 // This transform is not safe in general because we are reducing the number
16605 // of rounding steps.
16606 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16607 if (N0.getOpcode() == ISD::FMUL) {
16608 SDNode *CFP00 =
16610 SDNode *CFP01 =
16612
16613 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16614 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16615 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16616 DAG.getConstantFP(1.0, DL, VT));
16617 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16618 }
16619
16620 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16621 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16622 N1.getOperand(0) == N1.getOperand(1) &&
16623 N0.getOperand(0) == N1.getOperand(0)) {
16624 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16625 DAG.getConstantFP(2.0, DL, VT));
16626 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16627 }
16628 }
16629
16630 if (N1.getOpcode() == ISD::FMUL) {
16631 SDNode *CFP10 =
16633 SDNode *CFP11 =
16635
16636 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16637 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16638 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16639 DAG.getConstantFP(1.0, DL, VT));
16640 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16641 }
16642
16643 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16644 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16645 N0.getOperand(0) == N0.getOperand(1) &&
16646 N1.getOperand(0) == N0.getOperand(0)) {
16647 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16648 DAG.getConstantFP(2.0, DL, VT));
16649 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16650 }
16651 }
16652
16653 if (N0.getOpcode() == ISD::FADD) {
16654 SDNode *CFP00 =
16656 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16657 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16658 (N0.getOperand(0) == N1)) {
16659 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16660 DAG.getConstantFP(3.0, DL, VT));
16661 }
16662 }
16663
16664 if (N1.getOpcode() == ISD::FADD) {
16665 SDNode *CFP10 =
16667 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16668 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16669 N1.getOperand(0) == N0) {
16670 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16671 DAG.getConstantFP(3.0, DL, VT));
16672 }
16673 }
16674
16675 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16676 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16677 N0.getOperand(0) == N0.getOperand(1) &&
16678 N1.getOperand(0) == N1.getOperand(1) &&
16679 N0.getOperand(0) == N1.getOperand(0)) {
16680 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16681 DAG.getConstantFP(4.0, DL, VT));
16682 }
16683 }
16684
16685 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16686 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16687 VT, N0, N1, Flags))
16688 return SD;
16689 } // enable-unsafe-fp-math
16690
16691 // FADD -> FMA combines:
16692 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16693 if (Fused.getOpcode() != ISD::DELETED_NODE)
16694 AddToWorklist(Fused.getNode());
16695 return Fused;
16696 }
16697 return SDValue();
16698}
16699
16700SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16701 SDValue Chain = N->getOperand(0);
16702 SDValue N0 = N->getOperand(1);
16703 SDValue N1 = N->getOperand(2);
16704 EVT VT = N->getValueType(0);
16705 EVT ChainVT = N->getValueType(1);
16706 SDLoc DL(N);
16707 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16708
16709 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16710 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16711 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16712 N1, DAG, LegalOperations, ForCodeSize)) {
16713 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16714 {Chain, N0, NegN1});
16715 }
16716
16717 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16718 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16719 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16720 N0, DAG, LegalOperations, ForCodeSize)) {
16721 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16722 {Chain, N1, NegN0});
16723 }
16724 return SDValue();
16725}
16726
16727SDValue DAGCombiner::visitFSUB(SDNode *N) {
16728 SDValue N0 = N->getOperand(0);
16729 SDValue N1 = N->getOperand(1);
16730 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16731 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16732 EVT VT = N->getValueType(0);
16733 SDLoc DL(N);
16734 const TargetOptions &Options = DAG.getTarget().Options;
16735 const SDNodeFlags Flags = N->getFlags();
16736 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16737
16738 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16739 return R;
16740
16741 // fold (fsub c1, c2) -> c1-c2
16742 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16743 return C;
16744
16745 // fold vector ops
16746 if (VT.isVector())
16747 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16748 return FoldedVOp;
16749
16750 if (SDValue NewSel = foldBinOpIntoSelect(N))
16751 return NewSel;
16752
16753 // (fsub A, 0) -> A
16754 if (N1CFP && N1CFP->isZero()) {
16755 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16756 Flags.hasNoSignedZeros()) {
16757 return N0;
16758 }
16759 }
16760
16761 if (N0 == N1) {
16762 // (fsub x, x) -> 0.0
16763 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16764 return DAG.getConstantFP(0.0f, DL, VT);
16765 }
16766
16767 // (fsub -0.0, N1) -> -N1
16768 if (N0CFP && N0CFP->isZero()) {
16769 if (N0CFP->isNegative() ||
16770 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16771 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16772 // flushed to zero, unless all users treat denorms as zero (DAZ).
16773 // FIXME: This transform will change the sign of a NaN and the behavior
16774 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16775 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16776 if (DenormMode == DenormalMode::getIEEE()) {
16777 if (SDValue NegN1 =
16778 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16779 return NegN1;
16780 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16781 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16782 }
16783 }
16784 }
16785
16786 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16787 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16788 N1.getOpcode() == ISD::FADD) {
16789 // X - (X + Y) -> -Y
16790 if (N0 == N1->getOperand(0))
16791 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16792 // X - (Y + X) -> -Y
16793 if (N0 == N1->getOperand(1))
16794 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16795 }
16796
16797 // fold (fsub A, (fneg B)) -> (fadd A, B)
16798 if (SDValue NegN1 =
16799 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16800 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16801
16802 // FSUB -> FMA combines:
16803 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16804 AddToWorklist(Fused.getNode());
16805 return Fused;
16806 }
16807
16808 return SDValue();
16809}
16810
16811// Transform IEEE Floats:
16812// (fmul C, (uitofp Pow2))
16813// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16814// (fdiv C, (uitofp Pow2))
16815// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16816//
16817// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16818// there is no need for more than an add/sub.
16819//
16820// This is valid under the following circumstances:
16821// 1) We are dealing with IEEE floats
16822// 2) C is normal
16823// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16824// TODO: Much of this could also be used for generating `ldexp` on targets the
16825// prefer it.
16826SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16827 EVT VT = N->getValueType(0);
16828 SDValue ConstOp, Pow2Op;
16829
16830 std::optional<int> Mantissa;
16831 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16832 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16833 return false;
16834
16835 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16836 Pow2Op = N->getOperand(1 - ConstOpIdx);
16837 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16838 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16839 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16840 return false;
16841
16842 Pow2Op = Pow2Op.getOperand(0);
16843
16844 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16845 // TODO: We could use knownbits to make this bound more precise.
16846 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16847
16848 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16849 if (CFP == nullptr)
16850 return false;
16851
16852 const APFloat &APF = CFP->getValueAPF();
16853
16854 // Make sure we have normal/ieee constant.
16855 if (!APF.isNormal() || !APF.isIEEE())
16856 return false;
16857
16858 // Make sure the floats exponent is within the bounds that this transform
16859 // produces bitwise equals value.
16860 int CurExp = ilogb(APF);
16861 // FMul by pow2 will only increase exponent.
16862 int MinExp =
16863 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16864 // FDiv by pow2 will only decrease exponent.
16865 int MaxExp =
16866 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16867 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16869 return false;
16870
16871 // Finally make sure we actually know the mantissa for the float type.
16872 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16873 if (!Mantissa)
16874 Mantissa = ThisMantissa;
16875
16876 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16877 };
16878
16879 // TODO: We may be able to include undefs.
16880 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16881 };
16882
16883 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16884 return SDValue();
16885
16886 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16887 return SDValue();
16888
16889 // Get log2 after all other checks have taken place. This is because
16890 // BuildLogBase2 may create a new node.
16891 SDLoc DL(N);
16892 // Get Log2 type with same bitwidth as the float type (VT).
16893 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16894 if (VT.isVector())
16895 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16897
16898 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16899 /*InexpensiveOnly*/ true, NewIntVT);
16900 if (!Log2)
16901 return SDValue();
16902
16903 // Perform actual transform.
16904 SDValue MantissaShiftCnt =
16905 DAG.getShiftAmountConstant(*Mantissa, NewIntVT, DL);
16906 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16907 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16908 // cast. We could implement that by handle here to handle the casts.
16909 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16910 SDValue ResAsInt =
16911 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16912 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16913 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16914 return ResAsFP;
16915}
16916
16917SDValue DAGCombiner::visitFMUL(SDNode *N) {
16918 SDValue N0 = N->getOperand(0);
16919 SDValue N1 = N->getOperand(1);
16920 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16921 EVT VT = N->getValueType(0);
16922 SDLoc DL(N);
16923 const TargetOptions &Options = DAG.getTarget().Options;
16924 const SDNodeFlags Flags = N->getFlags();
16925 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16926
16927 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16928 return R;
16929
16930 // fold (fmul c1, c2) -> c1*c2
16931 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16932 return C;
16933
16934 // canonicalize constant to RHS
16937 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16938
16939 // fold vector ops
16940 if (VT.isVector())
16941 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16942 return FoldedVOp;
16943
16944 if (SDValue NewSel = foldBinOpIntoSelect(N))
16945 return NewSel;
16946
16947 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16948 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16950 N0.getOpcode() == ISD::FMUL) {
16951 SDValue N00 = N0.getOperand(0);
16952 SDValue N01 = N0.getOperand(1);
16953 // Avoid an infinite loop by making sure that N00 is not a constant
16954 // (the inner multiply has not been constant folded yet).
16957 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16958 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16959 }
16960 }
16961
16962 // Match a special-case: we convert X * 2.0 into fadd.
16963 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16964 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16965 N0.getOperand(0) == N0.getOperand(1)) {
16966 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16967 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16968 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16969 }
16970
16971 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16972 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16973 VT, N0, N1, Flags))
16974 return SD;
16975 }
16976
16977 // fold (fmul X, 2.0) -> (fadd X, X)
16978 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16979 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16980
16981 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16982 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16983 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16984 return DAG.getNode(ISD::FSUB, DL, VT,
16985 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16986 }
16987 }
16988
16989 // -N0 * -N1 --> N0 * N1
16994 SDValue NegN0 =
16995 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16996 if (NegN0) {
16997 HandleSDNode NegN0Handle(NegN0);
16998 SDValue NegN1 =
16999 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17000 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17002 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
17003 }
17004
17005 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
17006 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
17007 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
17008 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
17009 TLI.isOperationLegal(ISD::FABS, VT)) {
17010 SDValue Select = N0, X = N1;
17011 if (Select.getOpcode() != ISD::SELECT)
17012 std::swap(Select, X);
17013
17014 SDValue Cond = Select.getOperand(0);
17015 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
17016 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
17017
17018 if (TrueOpnd && FalseOpnd &&
17019 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
17020 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
17021 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
17022 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17023 switch (CC) {
17024 default: break;
17025 case ISD::SETOLT:
17026 case ISD::SETULT:
17027 case ISD::SETOLE:
17028 case ISD::SETULE:
17029 case ISD::SETLT:
17030 case ISD::SETLE:
17031 std::swap(TrueOpnd, FalseOpnd);
17032 [[fallthrough]];
17033 case ISD::SETOGT:
17034 case ISD::SETUGT:
17035 case ISD::SETOGE:
17036 case ISD::SETUGE:
17037 case ISD::SETGT:
17038 case ISD::SETGE:
17039 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
17040 TLI.isOperationLegal(ISD::FNEG, VT))
17041 return DAG.getNode(ISD::FNEG, DL, VT,
17042 DAG.getNode(ISD::FABS, DL, VT, X));
17043 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
17044 return DAG.getNode(ISD::FABS, DL, VT, X);
17045
17046 break;
17047 }
17048 }
17049 }
17050
17051 // FMUL -> FMA combines:
17052 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
17053 AddToWorklist(Fused.getNode());
17054 return Fused;
17055 }
17056
17057 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
17058 // able to run.
17059 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17060 return R;
17061
17062 return SDValue();
17063}
17064
17065template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
17066 SDValue N0 = N->getOperand(0);
17067 SDValue N1 = N->getOperand(1);
17068 SDValue N2 = N->getOperand(2);
17069 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
17070 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
17071 EVT VT = N->getValueType(0);
17072 SDLoc DL(N);
17073 const TargetOptions &Options = DAG.getTarget().Options;
17074 // FMA nodes have flags that propagate to the created nodes.
17075 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17076 MatchContextClass matcher(DAG, TLI, N);
17077
17078 // Constant fold FMA.
17079 if (isa<ConstantFPSDNode>(N0) &&
17080 isa<ConstantFPSDNode>(N1) &&
17081 isa<ConstantFPSDNode>(N2)) {
17082 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
17083 }
17084
17085 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17090 SDValue NegN0 =
17091 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17092 if (NegN0) {
17093 HandleSDNode NegN0Handle(NegN0);
17094 SDValue NegN1 =
17095 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17096 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17098 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17099 }
17100
17101 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17102 if (Options.UnsafeFPMath) {
17103 if (N0CFP && N0CFP->isZero())
17104 return N2;
17105 if (N1CFP && N1CFP->isZero())
17106 return N2;
17107 }
17108
17109 // FIXME: Support splat of constant.
17110 if (N0CFP && N0CFP->isExactlyValue(1.0))
17111 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
17112 if (N1CFP && N1CFP->isExactlyValue(1.0))
17113 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
17114
17115 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17118 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
17119
17120 bool CanReassociate =
17121 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17122 if (CanReassociate) {
17123 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17124 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17127 return matcher.getNode(
17128 ISD::FMUL, DL, VT, N0,
17129 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17130 }
17131
17132 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17133 if (matcher.match(N0, ISD::FMUL) &&
17136 return matcher.getNode(
17137 ISD::FMA, DL, VT, N0.getOperand(0),
17138 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17139 }
17140 }
17141
17142 // (fma x, -1, y) -> (fadd (fneg x), y)
17143 // FIXME: Support splat of constant.
17144 if (N1CFP) {
17145 if (N1CFP->isExactlyValue(1.0))
17146 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17147
17148 if (N1CFP->isExactlyValue(-1.0) &&
17149 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17150 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17151 AddToWorklist(RHSNeg.getNode());
17152 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17153 }
17154
17155 // fma (fneg x), K, y -> fma x -K, y
17156 if (matcher.match(N0, ISD::FNEG) &&
17158 (N1.hasOneUse() &&
17159 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17160 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17161 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17162 }
17163 }
17164
17165 // FIXME: Support splat of constant.
17166 if (CanReassociate) {
17167 // (fma x, c, x) -> (fmul x, (c+1))
17168 if (N1CFP && N0 == N2) {
17169 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17170 matcher.getNode(ISD::FADD, DL, VT, N1,
17171 DAG.getConstantFP(1.0, DL, VT)));
17172 }
17173
17174 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17175 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17176 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17177 matcher.getNode(ISD::FADD, DL, VT, N1,
17178 DAG.getConstantFP(-1.0, DL, VT)));
17179 }
17180 }
17181
17182 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17183 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17184 if (!TLI.isFNegFree(VT))
17186 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17187 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17188 return SDValue();
17189}
17190
17191SDValue DAGCombiner::visitFMAD(SDNode *N) {
17192 SDValue N0 = N->getOperand(0);
17193 SDValue N1 = N->getOperand(1);
17194 SDValue N2 = N->getOperand(2);
17195 EVT VT = N->getValueType(0);
17196 SDLoc DL(N);
17197
17198 // Constant fold FMAD.
17199 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17200 isa<ConstantFPSDNode>(N2))
17201 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17202
17203 return SDValue();
17204}
17205
17206// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17207// reciprocal.
17208// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17209// Notice that this is not always beneficial. One reason is different targets
17210// may have different costs for FDIV and FMUL, so sometimes the cost of two
17211// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17212// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17213SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17214 // TODO: Limit this transform based on optsize/minsize - it always creates at
17215 // least 1 extra instruction. But the perf win may be substantial enough
17216 // that only minsize should restrict this.
17217 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17218 const SDNodeFlags Flags = N->getFlags();
17219 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17220 return SDValue();
17221
17222 // Skip if current node is a reciprocal/fneg-reciprocal.
17223 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17224 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17225 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17226 return SDValue();
17227
17228 // Exit early if the target does not want this transform or if there can't
17229 // possibly be enough uses of the divisor to make the transform worthwhile.
17230 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17231
17232 // For splat vectors, scale the number of uses by the splat factor. If we can
17233 // convert the division into a scalar op, that will likely be much faster.
17234 unsigned NumElts = 1;
17235 EVT VT = N->getValueType(0);
17236 if (VT.isVector() && DAG.isSplatValue(N1))
17237 NumElts = VT.getVectorMinNumElements();
17238
17239 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17240 return SDValue();
17241
17242 // Find all FDIV users of the same divisor.
17243 // Use a set because duplicates may be present in the user list.
17245 for (auto *U : N1->uses()) {
17246 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17247 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17248 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17249 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17250 U->getFlags().hasAllowReassociation() &&
17251 U->getFlags().hasNoSignedZeros())
17252 continue;
17253
17254 // This division is eligible for optimization only if global unsafe math
17255 // is enabled or if this division allows reciprocal formation.
17256 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17257 Users.insert(U);
17258 }
17259 }
17260
17261 // Now that we have the actual number of divisor uses, make sure it meets
17262 // the minimum threshold specified by the target.
17263 if ((Users.size() * NumElts) < MinUses)
17264 return SDValue();
17265
17266 SDLoc DL(N);
17267 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17268 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17269
17270 // Dividend / Divisor -> Dividend * Reciprocal
17271 for (auto *U : Users) {
17272 SDValue Dividend = U->getOperand(0);
17273 if (Dividend != FPOne) {
17274 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17275 Reciprocal, Flags);
17276 CombineTo(U, NewNode);
17277 } else if (U != Reciprocal.getNode()) {
17278 // In the absence of fast-math-flags, this user node is always the
17279 // same node as Reciprocal, but with FMF they may be different nodes.
17280 CombineTo(U, Reciprocal);
17281 }
17282 }
17283 return SDValue(N, 0); // N was replaced.
17284}
17285
17286SDValue DAGCombiner::visitFDIV(SDNode *N) {
17287 SDValue N0 = N->getOperand(0);
17288 SDValue N1 = N->getOperand(1);
17289 EVT VT = N->getValueType(0);
17290 SDLoc DL(N);
17291 const TargetOptions &Options = DAG.getTarget().Options;
17292 SDNodeFlags Flags = N->getFlags();
17293 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17294
17295 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17296 return R;
17297
17298 // fold (fdiv c1, c2) -> c1/c2
17299 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17300 return C;
17301
17302 // fold vector ops
17303 if (VT.isVector())
17304 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17305 return FoldedVOp;
17306
17307 if (SDValue NewSel = foldBinOpIntoSelect(N))
17308 return NewSel;
17309
17311 return V;
17312
17313 // fold (fdiv X, c2) -> (fmul X, 1/c2) if there is no loss in precision, or
17314 // the loss is acceptable with AllowReciprocal.
17315 if (auto *N1CFP = isConstOrConstSplatFP(N1, true)) {
17316 // Compute the reciprocal 1.0 / c2.
17317 const APFloat &N1APF = N1CFP->getValueAPF();
17318 APFloat Recip = APFloat::getOne(N1APF.getSemantics());
17320 // Only do the transform if the reciprocal is a legal fp immediate that
17321 // isn't too nasty (eg NaN, denormal, ...).
17322 if (((st == APFloat::opOK && !Recip.isDenormal()) ||
17323 (st == APFloat::opInexact &&
17324 (Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
17325 (!LegalOperations ||
17326 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17327 // backend)... we should handle this gracefully after Legalize.
17328 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17330 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17331 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17332 DAG.getConstantFP(Recip, DL, VT));
17333 }
17334
17335 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17336 // If this FDIV is part of a reciprocal square root, it may be folded
17337 // into a target-specific square root estimate instruction.
17338 if (N1.getOpcode() == ISD::FSQRT) {
17339 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17340 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17341 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17342 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17343 if (SDValue RV =
17344 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17345 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17346 AddToWorklist(RV.getNode());
17347 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17348 }
17349 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17350 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17351 if (SDValue RV =
17352 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17353 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17354 AddToWorklist(RV.getNode());
17355 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17356 }
17357 } else if (N1.getOpcode() == ISD::FMUL) {
17358 // Look through an FMUL. Even though this won't remove the FDIV directly,
17359 // it's still worthwhile to get rid of the FSQRT if possible.
17360 SDValue Sqrt, Y;
17361 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17362 Sqrt = N1.getOperand(0);
17363 Y = N1.getOperand(1);
17364 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17365 Sqrt = N1.getOperand(1);
17366 Y = N1.getOperand(0);
17367 }
17368 if (Sqrt.getNode()) {
17369 // If the other multiply operand is known positive, pull it into the
17370 // sqrt. That will eliminate the division if we convert to an estimate.
17371 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17372 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17373 SDValue A;
17374 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17375 A = Y.getOperand(0);
17376 else if (Y == Sqrt.getOperand(0))
17377 A = Y;
17378 if (A) {
17379 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17380 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17381 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17382 SDValue AAZ =
17383 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17384 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17385 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17386
17387 // Estimate creation failed. Clean up speculatively created nodes.
17388 recursivelyDeleteUnusedNodes(AAZ.getNode());
17389 }
17390 }
17391
17392 // We found a FSQRT, so try to make this fold:
17393 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17394 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17395 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17396 AddToWorklist(Div.getNode());
17397 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17398 }
17399 }
17400 }
17401
17402 // Fold into a reciprocal estimate and multiply instead of a real divide.
17403 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17404 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17405 return RV;
17406 }
17407
17408 // Fold X/Sqrt(X) -> Sqrt(X)
17409 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17410 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17411 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17412 return N1;
17413
17414 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17419 SDValue NegN0 =
17420 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17421 if (NegN0) {
17422 HandleSDNode NegN0Handle(NegN0);
17423 SDValue NegN1 =
17424 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17425 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17427 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17428 }
17429
17430 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17431 return R;
17432
17433 return SDValue();
17434}
17435
17436SDValue DAGCombiner::visitFREM(SDNode *N) {
17437 SDValue N0 = N->getOperand(0);
17438 SDValue N1 = N->getOperand(1);
17439 EVT VT = N->getValueType(0);
17440 SDNodeFlags Flags = N->getFlags();
17441 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17442 SDLoc DL(N);
17443
17444 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17445 return R;
17446
17447 // fold (frem c1, c2) -> fmod(c1,c2)
17448 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17449 return C;
17450
17451 if (SDValue NewSel = foldBinOpIntoSelect(N))
17452 return NewSel;
17453
17454 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17455 // power of 2.
17456 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17460 DAG.isKnownToBeAPowerOfTwoFP(N1)) {
17461 bool NeedsCopySign =
17462 !Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
17463 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17464 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17465 SDValue MLA;
17467 MLA = DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17468 N1, N0);
17469 } else {
17470 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17471 MLA = DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17472 }
17473 return NeedsCopySign ? DAG.getNode(ISD::FCOPYSIGN, DL, VT, MLA, N0) : MLA;
17474 }
17475
17476 return SDValue();
17477}
17478
17479SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17480 SDNodeFlags Flags = N->getFlags();
17481 const TargetOptions &Options = DAG.getTarget().Options;
17482
17483 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17484 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17485 if (!Flags.hasApproximateFuncs() ||
17486 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17487 return SDValue();
17488
17489 SDValue N0 = N->getOperand(0);
17490 if (TLI.isFsqrtCheap(N0, DAG))
17491 return SDValue();
17492
17493 // FSQRT nodes have flags that propagate to the created nodes.
17494 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17495 // transform the fdiv, we may produce a sub-optimal estimate sequence
17496 // because the reciprocal calculation may not have to filter out a
17497 // 0.0 input.
17498 return buildSqrtEstimate(N0, Flags);
17499}
17500
17501/// copysign(x, fp_extend(y)) -> copysign(x, y)
17502/// copysign(x, fp_round(y)) -> copysign(x, y)
17503/// Operands to the functions are the type of X and Y respectively.
17504static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17505 // Always fold no-op FP casts.
17506 if (XTy == YTy)
17507 return true;
17508
17509 // Do not optimize out type conversion of f128 type yet.
17510 // For some targets like x86_64, configuration is changed to keep one f128
17511 // value in one SSE register, but instruction selection cannot handle
17512 // FCOPYSIGN on SSE registers yet.
17513 if (YTy == MVT::f128)
17514 return false;
17515
17517}
17518
17520 SDValue N1 = N->getOperand(1);
17521 if (N1.getOpcode() != ISD::FP_EXTEND &&
17522 N1.getOpcode() != ISD::FP_ROUND)
17523 return false;
17524 EVT N1VT = N1->getValueType(0);
17525 EVT N1Op0VT = N1->getOperand(0).getValueType();
17526 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17527}
17528
17529SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17530 SDValue N0 = N->getOperand(0);
17531 SDValue N1 = N->getOperand(1);
17532 EVT VT = N->getValueType(0);
17533 SDLoc DL(N);
17534
17535 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17536 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, DL, VT, {N0, N1}))
17537 return C;
17538
17539 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17540 const APFloat &V = N1C->getValueAPF();
17541 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17542 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17543 if (!V.isNegative()) {
17544 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17545 return DAG.getNode(ISD::FABS, DL, VT, N0);
17546 } else {
17547 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17548 return DAG.getNode(ISD::FNEG, DL, VT,
17549 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17550 }
17551 }
17552
17553 // copysign(fabs(x), y) -> copysign(x, y)
17554 // copysign(fneg(x), y) -> copysign(x, y)
17555 // copysign(copysign(x,z), y) -> copysign(x, y)
17556 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17557 N0.getOpcode() == ISD::FCOPYSIGN)
17558 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0.getOperand(0), N1);
17559
17560 // copysign(x, abs(y)) -> abs(x)
17561 if (N1.getOpcode() == ISD::FABS)
17562 return DAG.getNode(ISD::FABS, DL, VT, N0);
17563
17564 // copysign(x, copysign(y,z)) -> copysign(x, z)
17565 if (N1.getOpcode() == ISD::FCOPYSIGN)
17566 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(1));
17567
17568 // copysign(x, fp_extend(y)) -> copysign(x, y)
17569 // copysign(x, fp_round(y)) -> copysign(x, y)
17571 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N0, N1.getOperand(0));
17572
17573 // We only take the sign bit from the sign operand.
17574 EVT SignVT = N1.getValueType();
17575 if (SimplifyDemandedBits(N1,
17577 return SDValue(N, 0);
17578
17579 // We only take the non-sign bits from the value operand
17580 if (SimplifyDemandedBits(N0,
17582 return SDValue(N, 0);
17583
17584 return SDValue();
17585}
17586
17587SDValue DAGCombiner::visitFPOW(SDNode *N) {
17588 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17589 if (!ExponentC)
17590 return SDValue();
17591 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17592
17593 // Try to convert x ** (1/3) into cube root.
17594 // TODO: Handle the various flavors of long double.
17595 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17596 // Some range near 1/3 should be fine.
17597 EVT VT = N->getValueType(0);
17598 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17599 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17600 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17601 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17602 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17603 // For regular numbers, rounding may cause the results to differ.
17604 // Therefore, we require { nsz ninf nnan afn } for this transform.
17605 // TODO: We could select out the special cases if we don't have nsz/ninf.
17606 SDNodeFlags Flags = N->getFlags();
17607 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17608 !Flags.hasApproximateFuncs())
17609 return SDValue();
17610
17611 // Do not create a cbrt() libcall if the target does not have it, and do not
17612 // turn a pow that has lowering support into a cbrt() libcall.
17613 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17616 return SDValue();
17617
17618 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17619 }
17620
17621 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17622 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17623 // TODO: This could be extended (using a target hook) to handle smaller
17624 // power-of-2 fractional exponents.
17625 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17626 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17627 if (ExponentIs025 || ExponentIs075) {
17628 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17629 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17630 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17631 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17632 // For regular numbers, rounding may cause the results to differ.
17633 // Therefore, we require { nsz ninf afn } for this transform.
17634 // TODO: We could select out the special cases if we don't have nsz/ninf.
17635 SDNodeFlags Flags = N->getFlags();
17636
17637 // We only need no signed zeros for the 0.25 case.
17638 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17639 !Flags.hasApproximateFuncs())
17640 return SDValue();
17641
17642 // Don't double the number of libcalls. We are trying to inline fast code.
17644 return SDValue();
17645
17646 // Assume that libcalls are the smallest code.
17647 // TODO: This restriction should probably be lifted for vectors.
17648 if (ForCodeSize)
17649 return SDValue();
17650
17651 // pow(X, 0.25) --> sqrt(sqrt(X))
17652 SDLoc DL(N);
17653 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17654 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17655 if (ExponentIs025)
17656 return SqrtSqrt;
17657 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17658 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17659 }
17660
17661 return SDValue();
17662}
17663
17665 const TargetLowering &TLI) {
17666 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17667 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17668 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17669 // conversions would return +0.0.
17670 // FIXME: We should be able to use node-level FMF here.
17671 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17672 EVT VT = N->getValueType(0);
17673 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17675 return SDValue();
17676
17677 // fptosi/fptoui round towards zero, so converting from FP to integer and
17678 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17679 SDValue N0 = N->getOperand(0);
17680 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17681 N0.getOperand(0).getValueType() == VT)
17682 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17683
17684 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17685 N0.getOperand(0).getValueType() == VT)
17686 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17687
17688 return SDValue();
17689}
17690
17691SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17692 SDValue N0 = N->getOperand(0);
17693 EVT VT = N->getValueType(0);
17694 EVT OpVT = N0.getValueType();
17695
17696 // [us]itofp(undef) = 0, because the result value is bounded.
17697 if (N0.isUndef())
17698 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17699
17700 // fold (sint_to_fp c1) -> c1fp
17702 // ...but only if the target supports immediate floating-point values
17703 (!LegalOperations ||
17705 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17706
17707 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17708 // but UINT_TO_FP is legal on this target, try to convert.
17709 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17710 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17711 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17712 if (DAG.SignBitIsZero(N0))
17713 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17714 }
17715
17716 // The next optimizations are desirable only if SELECT_CC can be lowered.
17717 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17718 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17719 !VT.isVector() &&
17720 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17721 SDLoc DL(N);
17722 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17723 DAG.getConstantFP(0.0, DL, VT));
17724 }
17725
17726 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17727 // (select (setcc x, y, cc), 1.0, 0.0)
17728 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17729 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17730 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17731 SDLoc DL(N);
17732 return DAG.getSelect(DL, VT, N0.getOperand(0),
17733 DAG.getConstantFP(1.0, DL, VT),
17734 DAG.getConstantFP(0.0, DL, VT));
17735 }
17736
17737 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17738 return FTrunc;
17739
17740 return SDValue();
17741}
17742
17743SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17744 SDValue N0 = N->getOperand(0);
17745 EVT VT = N->getValueType(0);
17746 EVT OpVT = N0.getValueType();
17747
17748 // [us]itofp(undef) = 0, because the result value is bounded.
17749 if (N0.isUndef())
17750 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17751
17752 // fold (uint_to_fp c1) -> c1fp
17754 // ...but only if the target supports immediate floating-point values
17755 (!LegalOperations ||
17757 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17758
17759 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17760 // but SINT_TO_FP is legal on this target, try to convert.
17761 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17762 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17763 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17764 if (DAG.SignBitIsZero(N0))
17765 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17766 }
17767
17768 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17769 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17770 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17771 SDLoc DL(N);
17772 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17773 DAG.getConstantFP(0.0, DL, VT));
17774 }
17775
17776 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17777 return FTrunc;
17778
17779 return SDValue();
17780}
17781
17782// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17784 SDValue N0 = N->getOperand(0);
17785 EVT VT = N->getValueType(0);
17786
17787 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17788 return SDValue();
17789
17790 SDValue Src = N0.getOperand(0);
17791 EVT SrcVT = Src.getValueType();
17792 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17793 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17794
17795 // We can safely assume the conversion won't overflow the output range,
17796 // because (for example) (uint8_t)18293.f is undefined behavior.
17797
17798 // Since we can assume the conversion won't overflow, our decision as to
17799 // whether the input will fit in the float should depend on the minimum
17800 // of the input range and output range.
17801
17802 // This means this is also safe for a signed input and unsigned output, since
17803 // a negative input would lead to undefined behavior.
17804 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17805 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17806 unsigned ActualSize = std::min(InputSize, OutputSize);
17807 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17808
17809 // We can only fold away the float conversion if the input range can be
17810 // represented exactly in the float range.
17811 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17812 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17813 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17815 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17816 }
17817 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17818 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17819 return DAG.getBitcast(VT, Src);
17820 }
17821 return SDValue();
17822}
17823
17824SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17825 SDValue N0 = N->getOperand(0);
17826 EVT VT = N->getValueType(0);
17827
17828 // fold (fp_to_sint undef) -> undef
17829 if (N0.isUndef())
17830 return DAG.getUNDEF(VT);
17831
17832 // fold (fp_to_sint c1fp) -> c1
17834 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17835
17836 return FoldIntToFPToInt(N, DAG);
17837}
17838
17839SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17840 SDValue N0 = N->getOperand(0);
17841 EVT VT = N->getValueType(0);
17842
17843 // fold (fp_to_uint undef) -> undef
17844 if (N0.isUndef())
17845 return DAG.getUNDEF(VT);
17846
17847 // fold (fp_to_uint c1fp) -> c1
17849 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17850
17851 return FoldIntToFPToInt(N, DAG);
17852}
17853
17854SDValue DAGCombiner::visitXRINT(SDNode *N) {
17855 SDValue N0 = N->getOperand(0);
17856 EVT VT = N->getValueType(0);
17857
17858 // fold (lrint|llrint undef) -> undef
17859 if (N0.isUndef())
17860 return DAG.getUNDEF(VT);
17861
17862 // fold (lrint|llrint c1fp) -> c1
17864 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17865
17866 return SDValue();
17867}
17868
17869SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17870 SDValue N0 = N->getOperand(0);
17871 SDValue N1 = N->getOperand(1);
17872 EVT VT = N->getValueType(0);
17873
17874 // fold (fp_round c1fp) -> c1fp
17875 if (SDValue C =
17876 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17877 return C;
17878
17879 // fold (fp_round (fp_extend x)) -> x
17880 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17881 return N0.getOperand(0);
17882
17883 // fold (fp_round (fp_round x)) -> (fp_round x)
17884 if (N0.getOpcode() == ISD::FP_ROUND) {
17885 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17886 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17887
17888 // Avoid folding legal fp_rounds into non-legal ones.
17889 if (!hasOperation(ISD::FP_ROUND, VT))
17890 return SDValue();
17891
17892 // Skip this folding if it results in an fp_round from f80 to f16.
17893 //
17894 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17895 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17896 // instructions from f32 or f64. Moreover, the first (value-preserving)
17897 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17898 // x86.
17899 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17900 return SDValue();
17901
17902 // If the first fp_round isn't a value preserving truncation, it might
17903 // introduce a tie in the second fp_round, that wouldn't occur in the
17904 // single-step fp_round we want to fold to.
17905 // In other words, double rounding isn't the same as rounding.
17906 // Also, this is a value preserving truncation iff both fp_round's are.
17907 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17908 SDLoc DL(N);
17909 return DAG.getNode(
17910 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17911 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17912 }
17913 }
17914
17915 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17916 // Note: From a legality perspective, this is a two step transform. First,
17917 // we duplicate the fp_round to the arguments of the copysign, then we
17918 // eliminate the fp_round on Y. The second step requires an additional
17919 // predicate to match the implementation above.
17920 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17922 N0.getValueType())) {
17923 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17924 N0.getOperand(0), N1);
17925 AddToWorklist(Tmp.getNode());
17926 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17927 Tmp, N0.getOperand(1));
17928 }
17929
17930 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17931 return NewVSel;
17932
17933 return SDValue();
17934}
17935
17936SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17937 SDValue N0 = N->getOperand(0);
17938 EVT VT = N->getValueType(0);
17939
17940 if (VT.isVector())
17941 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17942 return FoldedVOp;
17943
17944 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17945 if (N->hasOneUse() &&
17946 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17947 return SDValue();
17948
17949 // fold (fp_extend c1fp) -> c1fp
17951 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17952
17953 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17954 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17956 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17957
17958 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17959 // value of X.
17960 if (N0.getOpcode() == ISD::FP_ROUND
17961 && N0.getConstantOperandVal(1) == 1) {
17962 SDValue In = N0.getOperand(0);
17963 if (In.getValueType() == VT) return In;
17964 if (VT.bitsLT(In.getValueType()))
17965 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17966 In, N0.getOperand(1));
17967 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17968 }
17969
17970 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17971 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17973 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17974 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17975 LN0->getChain(),
17976 LN0->getBasePtr(), N0.getValueType(),
17977 LN0->getMemOperand());
17978 CombineTo(N, ExtLoad);
17979 CombineTo(
17980 N0.getNode(),
17981 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17982 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17983 ExtLoad.getValue(1));
17984 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17985 }
17986
17987 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17988 return NewVSel;
17989
17990 return SDValue();
17991}
17992
17993SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17994 SDValue N0 = N->getOperand(0);
17995 EVT VT = N->getValueType(0);
17996
17997 // fold (fceil c1) -> fceil(c1)
17999 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
18000
18001 return SDValue();
18002}
18003
18004SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
18005 SDValue N0 = N->getOperand(0);
18006 EVT VT = N->getValueType(0);
18007
18008 // fold (ftrunc c1) -> ftrunc(c1)
18010 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
18011
18012 // fold ftrunc (known rounded int x) -> x
18013 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
18014 // likely to be generated to extract integer from a rounded floating value.
18015 switch (N0.getOpcode()) {
18016 default: break;
18017 case ISD::FRINT:
18018 case ISD::FTRUNC:
18019 case ISD::FNEARBYINT:
18020 case ISD::FROUNDEVEN:
18021 case ISD::FFLOOR:
18022 case ISD::FCEIL:
18023 return N0;
18024 }
18025
18026 return SDValue();
18027}
18028
18029SDValue DAGCombiner::visitFFREXP(SDNode *N) {
18030 SDValue N0 = N->getOperand(0);
18031
18032 // fold (ffrexp c1) -> ffrexp(c1)
18034 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
18035 return SDValue();
18036}
18037
18038SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
18039 SDValue N0 = N->getOperand(0);
18040 EVT VT = N->getValueType(0);
18041
18042 // fold (ffloor c1) -> ffloor(c1)
18044 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
18045
18046 return SDValue();
18047}
18048
18049SDValue DAGCombiner::visitFNEG(SDNode *N) {
18050 SDValue N0 = N->getOperand(0);
18051 EVT VT = N->getValueType(0);
18052 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18053
18054 // Constant fold FNEG.
18056 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
18057
18058 if (SDValue NegN0 =
18059 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
18060 return NegN0;
18061
18062 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
18063 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
18064 // know it was called from a context with a nsz flag if the input fsub does
18065 // not.
18066 if (N0.getOpcode() == ISD::FSUB &&
18068 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
18069 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
18070 N0.getOperand(0));
18071 }
18072
18073 if (SDValue Cast = foldSignChangeInBitcast(N))
18074 return Cast;
18075
18076 return SDValue();
18077}
18078
18079SDValue DAGCombiner::visitFMinMax(SDNode *N) {
18080 SDValue N0 = N->getOperand(0);
18081 SDValue N1 = N->getOperand(1);
18082 EVT VT = N->getValueType(0);
18083 const SDNodeFlags Flags = N->getFlags();
18084 unsigned Opc = N->getOpcode();
18085 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
18086 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
18087 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
18088
18089 // Constant fold.
18090 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18091 return C;
18092
18093 // Canonicalize to constant on RHS.
18096 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18097
18098 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18099 const APFloat &AF = N1CFP->getValueAPF();
18100
18101 // minnum(X, nan) -> X
18102 // maxnum(X, nan) -> X
18103 // minimum(X, nan) -> nan
18104 // maximum(X, nan) -> nan
18105 if (AF.isNaN())
18106 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18107
18108 // In the following folds, inf can be replaced with the largest finite
18109 // float, if the ninf flag is set.
18110 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18111 // minnum(X, -inf) -> -inf
18112 // maxnum(X, +inf) -> +inf
18113 // minimum(X, -inf) -> -inf if nnan
18114 // maximum(X, +inf) -> +inf if nnan
18115 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18116 return N->getOperand(1);
18117
18118 // minnum(X, +inf) -> X if nnan
18119 // maxnum(X, -inf) -> X if nnan
18120 // minimum(X, +inf) -> X
18121 // maximum(X, -inf) -> X
18122 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18123 return N->getOperand(0);
18124 }
18125 }
18126
18127 if (SDValue SD = reassociateReduction(
18128 PropagatesNaN
18131 Opc, SDLoc(N), VT, N0, N1, Flags))
18132 return SD;
18133
18134 return SDValue();
18135}
18136
18137SDValue DAGCombiner::visitFABS(SDNode *N) {
18138 SDValue N0 = N->getOperand(0);
18139 EVT VT = N->getValueType(0);
18140
18141 // fold (fabs c1) -> fabs(c1)
18143 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
18144
18145 // fold (fabs (fabs x)) -> (fabs x)
18146 if (N0.getOpcode() == ISD::FABS)
18147 return N->getOperand(0);
18148
18149 // fold (fabs (fneg x)) -> (fabs x)
18150 // fold (fabs (fcopysign x, y)) -> (fabs x)
18151 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18152 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
18153
18154 if (SDValue Cast = foldSignChangeInBitcast(N))
18155 return Cast;
18156
18157 return SDValue();
18158}
18159
18160SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18161 SDValue Chain = N->getOperand(0);
18162 SDValue N1 = N->getOperand(1);
18163 SDValue N2 = N->getOperand(2);
18164
18165 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18166 // nondeterministic jumps).
18167 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18168 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18169 N1->getOperand(0), N2);
18170 }
18171
18172 // Variant of the previous fold where there is a SETCC in between:
18173 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18174 // =>
18175 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18176 // =>
18177 // BRCOND(SETCC(X, CONST, Cond))
18178 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18179 // isn't equivalent to true or false.
18180 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18181 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18182 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18183 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18184 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18185 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18186 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18187 bool Updated = false;
18188
18189 // Is 'X Cond C' always true or false?
18190 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18191 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18192 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18193 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18194 (Cond == ISD::SETGT && C->isMaxSignedValue());
18195 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18196 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18197 (Cond == ISD::SETUGE && C->isZero()) ||
18198 (Cond == ISD::SETGE && C->isMinSignedValue());
18199 return True || False;
18200 };
18201
18202 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18203 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18204 S0 = S0->getOperand(0);
18205 Updated = true;
18206 }
18207 }
18208 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18209 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18210 S1 = S1->getOperand(0);
18211 Updated = true;
18212 }
18213 }
18214
18215 if (Updated)
18216 return DAG.getNode(
18217 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18218 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
18219 }
18220
18221 // If N is a constant we could fold this into a fallthrough or unconditional
18222 // branch. However that doesn't happen very often in normal code, because
18223 // Instcombine/SimplifyCFG should have handled the available opportunities.
18224 // If we did this folding here, it would be necessary to update the
18225 // MachineBasicBlock CFG, which is awkward.
18226
18227 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18228 // on the target.
18229 if (N1.getOpcode() == ISD::SETCC &&
18231 N1.getOperand(0).getValueType())) {
18232 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18233 Chain, N1.getOperand(2),
18234 N1.getOperand(0), N1.getOperand(1), N2);
18235 }
18236
18237 if (N1.hasOneUse()) {
18238 // rebuildSetCC calls visitXor which may change the Chain when there is a
18239 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18240 HandleSDNode ChainHandle(Chain);
18241 if (SDValue NewN1 = rebuildSetCC(N1))
18242 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18243 ChainHandle.getValue(), NewN1, N2);
18244 }
18245
18246 return SDValue();
18247}
18248
18249SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18250 if (N.getOpcode() == ISD::SRL ||
18251 (N.getOpcode() == ISD::TRUNCATE &&
18252 (N.getOperand(0).hasOneUse() &&
18253 N.getOperand(0).getOpcode() == ISD::SRL))) {
18254 // Look pass the truncate.
18255 if (N.getOpcode() == ISD::TRUNCATE)
18256 N = N.getOperand(0);
18257
18258 // Match this pattern so that we can generate simpler code:
18259 //
18260 // %a = ...
18261 // %b = and i32 %a, 2
18262 // %c = srl i32 %b, 1
18263 // brcond i32 %c ...
18264 //
18265 // into
18266 //
18267 // %a = ...
18268 // %b = and i32 %a, 2
18269 // %c = setcc eq %b, 0
18270 // brcond %c ...
18271 //
18272 // This applies only when the AND constant value has one bit set and the
18273 // SRL constant is equal to the log2 of the AND constant. The back-end is
18274 // smart enough to convert the result into a TEST/JMP sequence.
18275 SDValue Op0 = N.getOperand(0);
18276 SDValue Op1 = N.getOperand(1);
18277
18278 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18279 SDValue AndOp1 = Op0.getOperand(1);
18280
18281 if (AndOp1.getOpcode() == ISD::Constant) {
18282 const APInt &AndConst = AndOp1->getAsAPIntVal();
18283
18284 if (AndConst.isPowerOf2() &&
18285 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18286 SDLoc DL(N);
18287 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18288 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18289 ISD::SETNE);
18290 }
18291 }
18292 }
18293 }
18294
18295 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18296 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18297 if (N.getOpcode() == ISD::XOR) {
18298 // Because we may call this on a speculatively constructed
18299 // SimplifiedSetCC Node, we need to simplify this node first.
18300 // Ideally this should be folded into SimplifySetCC and not
18301 // here. For now, grab a handle to N so we don't lose it from
18302 // replacements interal to the visit.
18303 HandleSDNode XORHandle(N);
18304 while (N.getOpcode() == ISD::XOR) {
18305 SDValue Tmp = visitXOR(N.getNode());
18306 // No simplification done.
18307 if (!Tmp.getNode())
18308 break;
18309 // Returning N is form in-visit replacement that may invalidated
18310 // N. Grab value from Handle.
18311 if (Tmp.getNode() == N.getNode())
18312 N = XORHandle.getValue();
18313 else // Node simplified. Try simplifying again.
18314 N = Tmp;
18315 }
18316
18317 if (N.getOpcode() != ISD::XOR)
18318 return N;
18319
18320 SDValue Op0 = N->getOperand(0);
18321 SDValue Op1 = N->getOperand(1);
18322
18323 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18324 bool Equal = false;
18325 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18326 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18327 Op0.getValueType() == MVT::i1) {
18328 N = Op0;
18329 Op0 = N->getOperand(0);
18330 Op1 = N->getOperand(1);
18331 Equal = true;
18332 }
18333
18334 EVT SetCCVT = N.getValueType();
18335 if (LegalTypes)
18336 SetCCVT = getSetCCResultType(SetCCVT);
18337 // Replace the uses of XOR with SETCC
18338 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18339 Equal ? ISD::SETEQ : ISD::SETNE);
18340 }
18341 }
18342
18343 return SDValue();
18344}
18345
18346// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18347//
18348SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18349 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18350 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18351
18352 // If N is a constant we could fold this into a fallthrough or unconditional
18353 // branch. However that doesn't happen very often in normal code, because
18354 // Instcombine/SimplifyCFG should have handled the available opportunities.
18355 // If we did this folding here, it would be necessary to update the
18356 // MachineBasicBlock CFG, which is awkward.
18357
18358 // Use SimplifySetCC to simplify SETCC's.
18360 CondLHS, CondRHS, CC->get(), SDLoc(N),
18361 false);
18362 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18363
18364 // fold to a simpler setcc
18365 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18366 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18367 N->getOperand(0), Simp.getOperand(2),
18368 Simp.getOperand(0), Simp.getOperand(1),
18369 N->getOperand(4));
18370
18371 return SDValue();
18372}
18373
18374static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18375 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18376 const TargetLowering &TLI) {
18377 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18378 if (LD->isIndexed())
18379 return false;
18380 EVT VT = LD->getMemoryVT();
18381 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18382 return false;
18383 Ptr = LD->getBasePtr();
18384 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18385 if (ST->isIndexed())
18386 return false;
18387 EVT VT = ST->getMemoryVT();
18388 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18389 return false;
18390 Ptr = ST->getBasePtr();
18391 IsLoad = false;
18392 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18393 if (LD->isIndexed())
18394 return false;
18395 EVT VT = LD->getMemoryVT();
18396 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18397 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18398 return false;
18399 Ptr = LD->getBasePtr();
18400 IsMasked = true;
18401 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18402 if (ST->isIndexed())
18403 return false;
18404 EVT VT = ST->getMemoryVT();
18405 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18406 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18407 return false;
18408 Ptr = ST->getBasePtr();
18409 IsLoad = false;
18410 IsMasked = true;
18411 } else {
18412 return false;
18413 }
18414 return true;
18415}
18416
18417/// Try turning a load/store into a pre-indexed load/store when the base
18418/// pointer is an add or subtract and it has other uses besides the load/store.
18419/// After the transformation, the new indexed load/store has effectively folded
18420/// the add/subtract in and all of its other uses are redirected to the
18421/// new load/store.
18422bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18423 if (Level < AfterLegalizeDAG)
18424 return false;
18425
18426 bool IsLoad = true;
18427 bool IsMasked = false;
18428 SDValue Ptr;
18429 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18430 Ptr, TLI))
18431 return false;
18432
18433 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18434 // out. There is no reason to make this a preinc/predec.
18435 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18436 Ptr->hasOneUse())
18437 return false;
18438
18439 // Ask the target to do addressing mode selection.
18443 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18444 return false;
18445
18446 // Backends without true r+i pre-indexed forms may need to pass a
18447 // constant base with a variable offset so that constant coercion
18448 // will work with the patterns in canonical form.
18449 bool Swapped = false;
18450 if (isa<ConstantSDNode>(BasePtr)) {
18451 std::swap(BasePtr, Offset);
18452 Swapped = true;
18453 }
18454
18455 // Don't create a indexed load / store with zero offset.
18457 return false;
18458
18459 // Try turning it into a pre-indexed load / store except when:
18460 // 1) The new base ptr is a frame index.
18461 // 2) If N is a store and the new base ptr is either the same as or is a
18462 // predecessor of the value being stored.
18463 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18464 // that would create a cycle.
18465 // 4) All uses are load / store ops that use it as old base ptr.
18466
18467 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18468 // (plus the implicit offset) to a register to preinc anyway.
18469 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18470 return false;
18471
18472 // Check #2.
18473 if (!IsLoad) {
18474 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18475 : cast<StoreSDNode>(N)->getValue();
18476
18477 // Would require a copy.
18478 if (Val == BasePtr)
18479 return false;
18480
18481 // Would create a cycle.
18482 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18483 return false;
18484 }
18485
18486 // Caches for hasPredecessorHelper.
18489 Worklist.push_back(N);
18490
18491 // If the offset is a constant, there may be other adds of constants that
18492 // can be folded with this one. We should do this to avoid having to keep
18493 // a copy of the original base pointer.
18494 SmallVector<SDNode *, 16> OtherUses;
18495 constexpr unsigned int MaxSteps = 8192;
18496 if (isa<ConstantSDNode>(Offset))
18497 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18498 UE = BasePtr->use_end();
18499 UI != UE; ++UI) {
18500 SDUse &Use = UI.getUse();
18501 // Skip the use that is Ptr and uses of other results from BasePtr's
18502 // node (important for nodes that return multiple results).
18503 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18504 continue;
18505
18506 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18507 MaxSteps))
18508 continue;
18509
18510 if (Use.getUser()->getOpcode() != ISD::ADD &&
18511 Use.getUser()->getOpcode() != ISD::SUB) {
18512 OtherUses.clear();
18513 break;
18514 }
18515
18516 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18517 if (!isa<ConstantSDNode>(Op1)) {
18518 OtherUses.clear();
18519 break;
18520 }
18521
18522 // FIXME: In some cases, we can be smarter about this.
18523 if (Op1.getValueType() != Offset.getValueType()) {
18524 OtherUses.clear();
18525 break;
18526 }
18527
18528 OtherUses.push_back(Use.getUser());
18529 }
18530
18531 if (Swapped)
18532 std::swap(BasePtr, Offset);
18533
18534 // Now check for #3 and #4.
18535 bool RealUse = false;
18536
18537 for (SDNode *Use : Ptr->uses()) {
18538 if (Use == N)
18539 continue;
18540 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18541 return false;
18542
18543 // If Ptr may be folded in addressing mode of other use, then it's
18544 // not profitable to do this transformation.
18545 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18546 RealUse = true;
18547 }
18548
18549 if (!RealUse)
18550 return false;
18551
18553 if (!IsMasked) {
18554 if (IsLoad)
18555 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18556 else
18557 Result =
18558 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18559 } else {
18560 if (IsLoad)
18561 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18562 Offset, AM);
18563 else
18564 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18565 Offset, AM);
18566 }
18567 ++PreIndexedNodes;
18568 ++NodesCombined;
18569 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18570 Result.dump(&DAG); dbgs() << '\n');
18571 WorklistRemover DeadNodes(*this);
18572 if (IsLoad) {
18573 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18574 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18575 } else {
18576 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18577 }
18578
18579 // Finally, since the node is now dead, remove it from the graph.
18580 deleteAndRecombine(N);
18581
18582 if (Swapped)
18583 std::swap(BasePtr, Offset);
18584
18585 // Replace other uses of BasePtr that can be updated to use Ptr
18586 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18587 unsigned OffsetIdx = 1;
18588 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18589 OffsetIdx = 0;
18590 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18591 BasePtr.getNode() && "Expected BasePtr operand");
18592
18593 // We need to replace ptr0 in the following expression:
18594 // x0 * offset0 + y0 * ptr0 = t0
18595 // knowing that
18596 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18597 //
18598 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18599 // indexed load/store and the expression that needs to be re-written.
18600 //
18601 // Therefore, we have:
18602 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18603
18604 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18605 const APInt &Offset0 = CN->getAPIntValue();
18606 const APInt &Offset1 = Offset->getAsAPIntVal();
18607 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18608 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18609 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18610 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18611
18612 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18613
18614 APInt CNV = Offset0;
18615 if (X0 < 0) CNV = -CNV;
18616 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18617 else CNV = CNV - Offset1;
18618
18619 SDLoc DL(OtherUses[i]);
18620
18621 // We can now generate the new expression.
18622 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18623 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18624
18625 SDValue NewUse = DAG.getNode(Opcode,
18626 DL,
18627 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18628 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18629 deleteAndRecombine(OtherUses[i]);
18630 }
18631
18632 // Replace the uses of Ptr with uses of the updated base value.
18633 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18634 deleteAndRecombine(Ptr.getNode());
18635 AddToWorklist(Result.getNode());
18636
18637 return true;
18638}
18639
18641 SDValue &BasePtr, SDValue &Offset,
18643 SelectionDAG &DAG,
18644 const TargetLowering &TLI) {
18645 if (PtrUse == N ||
18646 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18647 return false;
18648
18649 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18650 return false;
18651
18652 // Don't create a indexed load / store with zero offset.
18654 return false;
18655
18656 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18657 return false;
18658
18660 for (SDNode *Use : BasePtr->uses()) {
18661 if (Use == Ptr.getNode())
18662 continue;
18663
18664 // No if there's a later user which could perform the index instead.
18665 if (isa<MemSDNode>(Use)) {
18666 bool IsLoad = true;
18667 bool IsMasked = false;
18668 SDValue OtherPtr;
18670 IsMasked, OtherPtr, TLI)) {
18672 Worklist.push_back(Use);
18673 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18674 return false;
18675 }
18676 }
18677
18678 // If all the uses are load / store addresses, then don't do the
18679 // transformation.
18680 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18681 for (SDNode *UseUse : Use->uses())
18682 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18683 return false;
18684 }
18685 }
18686 return true;
18687}
18688
18690 bool &IsMasked, SDValue &Ptr,
18691 SDValue &BasePtr, SDValue &Offset,
18693 SelectionDAG &DAG,
18694 const TargetLowering &TLI) {
18696 IsMasked, Ptr, TLI) ||
18697 Ptr->hasOneUse())
18698 return nullptr;
18699
18700 // Try turning it into a post-indexed load / store except when
18701 // 1) All uses are load / store ops that use it as base ptr (and
18702 // it may be folded as addressing mmode).
18703 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18704 // nor a successor of N. Otherwise, if Op is folded that would
18705 // create a cycle.
18706 for (SDNode *Op : Ptr->uses()) {
18707 // Check for #1.
18708 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18709 continue;
18710
18711 // Check for #2.
18714 constexpr unsigned int MaxSteps = 8192;
18715 // Ptr is predecessor to both N and Op.
18716 Visited.insert(Ptr.getNode());
18717 Worklist.push_back(N);
18718 Worklist.push_back(Op);
18719 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18720 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18721 return Op;
18722 }
18723 return nullptr;
18724}
18725
18726/// Try to combine a load/store with a add/sub of the base pointer node into a
18727/// post-indexed load/store. The transformation folded the add/subtract into the
18728/// new indexed load/store effectively and all of its uses are redirected to the
18729/// new load/store.
18730bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18731 if (Level < AfterLegalizeDAG)
18732 return false;
18733
18734 bool IsLoad = true;
18735 bool IsMasked = false;
18736 SDValue Ptr;
18740 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18741 Offset, AM, DAG, TLI);
18742 if (!Op)
18743 return false;
18744
18746 if (!IsMasked)
18747 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18748 Offset, AM)
18749 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18750 BasePtr, Offset, AM);
18751 else
18752 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18753 BasePtr, Offset, AM)
18755 BasePtr, Offset, AM);
18756 ++PostIndexedNodes;
18757 ++NodesCombined;
18758 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18759 Result.dump(&DAG); dbgs() << '\n');
18760 WorklistRemover DeadNodes(*this);
18761 if (IsLoad) {
18762 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18763 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18764 } else {
18765 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18766 }
18767
18768 // Finally, since the node is now dead, remove it from the graph.
18769 deleteAndRecombine(N);
18770
18771 // Replace the uses of Use with uses of the updated base value.
18773 Result.getValue(IsLoad ? 1 : 0));
18774 deleteAndRecombine(Op);
18775 return true;
18776}
18777
18778/// Return the base-pointer arithmetic from an indexed \p LD.
18779SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18780 ISD::MemIndexedMode AM = LD->getAddressingMode();
18781 assert(AM != ISD::UNINDEXED);
18782 SDValue BP = LD->getOperand(1);
18783 SDValue Inc = LD->getOperand(2);
18784
18785 // Some backends use TargetConstants for load offsets, but don't expect
18786 // TargetConstants in general ADD nodes. We can convert these constants into
18787 // regular Constants (if the constant is not opaque).
18789 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18790 "Cannot split out indexing using opaque target constants");
18791 if (Inc.getOpcode() == ISD::TargetConstant) {
18792 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18793 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18794 ConstInc->getValueType(0));
18795 }
18796
18797 unsigned Opc =
18798 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18799 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18800}
18801
18803 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18804}
18805
18806bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18807 EVT STType = Val.getValueType();
18808 EVT STMemType = ST->getMemoryVT();
18809 if (STType == STMemType)
18810 return true;
18811 if (isTypeLegal(STMemType))
18812 return false; // fail.
18813 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18814 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18815 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18816 return true;
18817 }
18818 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18819 STType.isInteger() && STMemType.isInteger()) {
18820 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18821 return true;
18822 }
18823 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18824 Val = DAG.getBitcast(STMemType, Val);
18825 return true;
18826 }
18827 return false; // fail.
18828}
18829
18830bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18831 EVT LDMemType = LD->getMemoryVT();
18832 EVT LDType = LD->getValueType(0);
18833 assert(Val.getValueType() == LDMemType &&
18834 "Attempting to extend value of non-matching type");
18835 if (LDType == LDMemType)
18836 return true;
18837 if (LDMemType.isInteger() && LDType.isInteger()) {
18838 switch (LD->getExtensionType()) {
18839 case ISD::NON_EXTLOAD:
18840 Val = DAG.getBitcast(LDType, Val);
18841 return true;
18842 case ISD::EXTLOAD:
18843 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18844 return true;
18845 case ISD::SEXTLOAD:
18846 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18847 return true;
18848 case ISD::ZEXTLOAD:
18849 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18850 return true;
18851 }
18852 }
18853 return false;
18854}
18855
18856StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18857 int64_t &Offset) {
18858 SDValue Chain = LD->getOperand(0);
18859
18860 // Look through CALLSEQ_START.
18861 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18862 Chain = Chain->getOperand(0);
18863
18864 StoreSDNode *ST = nullptr;
18866 if (Chain.getOpcode() == ISD::TokenFactor) {
18867 // Look for unique store within the TokenFactor.
18868 for (SDValue Op : Chain->ops()) {
18869 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18870 if (!Store)
18871 continue;
18872 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18873 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18874 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18875 continue;
18876 // Make sure the store is not aliased with any nodes in TokenFactor.
18877 GatherAllAliases(Store, Chain, Aliases);
18878 if (Aliases.empty() ||
18879 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18880 ST = Store;
18881 break;
18882 }
18883 } else {
18884 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18885 if (Store) {
18886 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18887 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18888 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18889 ST = Store;
18890 }
18891 }
18892
18893 return ST;
18894}
18895
18896SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18897 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18898 return SDValue();
18899 SDValue Chain = LD->getOperand(0);
18900 int64_t Offset;
18901
18902 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18903 // TODO: Relax this restriction for unordered atomics (see D66309)
18904 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18905 return SDValue();
18906
18907 EVT LDType = LD->getValueType(0);
18908 EVT LDMemType = LD->getMemoryVT();
18909 EVT STMemType = ST->getMemoryVT();
18910 EVT STType = ST->getValue().getValueType();
18911
18912 // There are two cases to consider here:
18913 // 1. The store is fixed width and the load is scalable. In this case we
18914 // don't know at compile time if the store completely envelops the load
18915 // so we abandon the optimisation.
18916 // 2. The store is scalable and the load is fixed width. We could
18917 // potentially support a limited number of cases here, but there has been
18918 // no cost-benefit analysis to prove it's worth it.
18919 bool LdStScalable = LDMemType.isScalableVT();
18920 if (LdStScalable != STMemType.isScalableVT())
18921 return SDValue();
18922
18923 // If we are dealing with scalable vectors on a big endian platform the
18924 // calculation of offsets below becomes trickier, since we do not know at
18925 // compile time the absolute size of the vector. Until we've done more
18926 // analysis on big-endian platforms it seems better to bail out for now.
18927 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18928 return SDValue();
18929
18930 // Normalize for Endianness. After this Offset=0 will denote that the least
18931 // significant bit in the loaded value maps to the least significant bit in
18932 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18933 // n:th least significant byte of the stored value.
18934 int64_t OrigOffset = Offset;
18935 if (DAG.getDataLayout().isBigEndian())
18936 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18937 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18938 8 -
18939 Offset;
18940
18941 // Check that the stored value cover all bits that are loaded.
18942 bool STCoversLD;
18943
18944 TypeSize LdMemSize = LDMemType.getSizeInBits();
18945 TypeSize StMemSize = STMemType.getSizeInBits();
18946 if (LdStScalable)
18947 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18948 else
18949 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18950 StMemSize.getFixedValue());
18951
18952 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18953 if (LD->isIndexed()) {
18954 // Cannot handle opaque target constants and we must respect the user's
18955 // request not to split indexes from loads.
18956 if (!canSplitIdx(LD))
18957 return SDValue();
18958 SDValue Idx = SplitIndexingFromLoad(LD);
18959 SDValue Ops[] = {Val, Idx, Chain};
18960 return CombineTo(LD, Ops, 3);
18961 }
18962 return CombineTo(LD, Val, Chain);
18963 };
18964
18965 if (!STCoversLD)
18966 return SDValue();
18967
18968 // Memory as copy space (potentially masked).
18969 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18970 // Simple case: Direct non-truncating forwarding
18971 if (LDType.getSizeInBits() == LdMemSize)
18972 return ReplaceLd(LD, ST->getValue(), Chain);
18973 // Can we model the truncate and extension with an and mask?
18974 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18975 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18976 // Mask to size of LDMemType
18977 auto Mask =
18979 StMemSize.getFixedValue()),
18980 SDLoc(ST), STType);
18981 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18982 return ReplaceLd(LD, Val, Chain);
18983 }
18984 }
18985
18986 // Handle some cases for big-endian that would be Offset 0 and handled for
18987 // little-endian.
18988 SDValue Val = ST->getValue();
18989 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18990 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18991 !LDType.isVector() && isTypeLegal(STType) &&
18992 TLI.isOperationLegal(ISD::SRL, STType)) {
18993 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18994 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18995 Offset = 0;
18996 }
18997 }
18998
18999 // TODO: Deal with nonzero offset.
19000 if (LD->getBasePtr().isUndef() || Offset != 0)
19001 return SDValue();
19002 // Model necessary truncations / extenstions.
19003 // Truncate Value To Stored Memory Size.
19004 do {
19005 if (!getTruncatedStoreValue(ST, Val))
19006 break;
19007 if (!isTypeLegal(LDMemType))
19008 break;
19009 if (STMemType != LDMemType) {
19010 // TODO: Support vectors? This requires extract_subvector/bitcast.
19011 if (!STMemType.isVector() && !LDMemType.isVector() &&
19012 STMemType.isInteger() && LDMemType.isInteger())
19013 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
19014 else
19015 break;
19016 }
19017 if (!extendLoadedValueToExtension(LD, Val))
19018 break;
19019 return ReplaceLd(LD, Val, Chain);
19020 } while (false);
19021
19022 // On failure, cleanup dead nodes we may have created.
19023 if (Val->use_empty())
19024 deleteAndRecombine(Val.getNode());
19025 return SDValue();
19026}
19027
19028SDValue DAGCombiner::visitLOAD(SDNode *N) {
19029 LoadSDNode *LD = cast<LoadSDNode>(N);
19030 SDValue Chain = LD->getChain();
19031 SDValue Ptr = LD->getBasePtr();
19032
19033 // If load is not volatile and there are no uses of the loaded value (and
19034 // the updated indexed value in case of indexed loads), change uses of the
19035 // chain value into uses of the chain input (i.e. delete the dead load).
19036 // TODO: Allow this for unordered atomics (see D66309)
19037 if (LD->isSimple()) {
19038 if (N->getValueType(1) == MVT::Other) {
19039 // Unindexed loads.
19040 if (!N->hasAnyUseOfValue(0)) {
19041 // It's not safe to use the two value CombineTo variant here. e.g.
19042 // v1, chain2 = load chain1, loc
19043 // v2, chain3 = load chain2, loc
19044 // v3 = add v2, c
19045 // Now we replace use of chain2 with chain1. This makes the second load
19046 // isomorphic to the one we are deleting, and thus makes this load live.
19047 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
19048 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
19049 dbgs() << "\n");
19050 WorklistRemover DeadNodes(*this);
19051 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19052 AddUsersToWorklist(Chain.getNode());
19053 if (N->use_empty())
19054 deleteAndRecombine(N);
19055
19056 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19057 }
19058 } else {
19059 // Indexed loads.
19060 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
19061
19062 // If this load has an opaque TargetConstant offset, then we cannot split
19063 // the indexing into an add/sub directly (that TargetConstant may not be
19064 // valid for a different type of node, and we cannot convert an opaque
19065 // target constant into a regular constant).
19066 bool CanSplitIdx = canSplitIdx(LD);
19067
19068 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
19069 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
19070 SDValue Index;
19071 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
19072 Index = SplitIndexingFromLoad(LD);
19073 // Try to fold the base pointer arithmetic into subsequent loads and
19074 // stores.
19075 AddUsersToWorklist(N);
19076 } else
19077 Index = DAG.getUNDEF(N->getValueType(1));
19078 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
19079 dbgs() << "\nWith: "; Undef.dump(&DAG);
19080 dbgs() << " and 2 other values\n");
19081 WorklistRemover DeadNodes(*this);
19082 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
19084 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
19085 deleteAndRecombine(N);
19086 return SDValue(N, 0); // Return N so it doesn't get rechecked!
19087 }
19088 }
19089 }
19090
19091 // If this load is directly stored, replace the load value with the stored
19092 // value.
19093 if (auto V = ForwardStoreValueToDirectLoad(LD))
19094 return V;
19095
19096 // Try to infer better alignment information than the load already has.
19097 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19098 !LD->isAtomic()) {
19099 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19100 if (*Alignment > LD->getAlign() &&
19101 isAligned(*Alignment, LD->getSrcValueOffset())) {
19102 SDValue NewLoad = DAG.getExtLoad(
19103 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19104 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19105 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19106 // NewLoad will always be N as we are only refining the alignment
19107 assert(NewLoad.getNode() == N);
19108 (void)NewLoad;
19109 }
19110 }
19111 }
19112
19113 if (LD->isUnindexed()) {
19114 // Walk up chain skipping non-aliasing memory nodes.
19115 SDValue BetterChain = FindBetterChain(LD, Chain);
19116
19117 // If there is a better chain.
19118 if (Chain != BetterChain) {
19119 SDValue ReplLoad;
19120
19121 // Replace the chain to void dependency.
19122 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19123 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19124 BetterChain, Ptr, LD->getMemOperand());
19125 } else {
19126 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19127 LD->getValueType(0),
19128 BetterChain, Ptr, LD->getMemoryVT(),
19129 LD->getMemOperand());
19130 }
19131
19132 // Create token factor to keep old chain connected.
19133 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19134 MVT::Other, Chain, ReplLoad.getValue(1));
19135
19136 // Replace uses with load result and token factor
19137 return CombineTo(N, ReplLoad.getValue(0), Token);
19138 }
19139 }
19140
19141 // Try transforming N to an indexed load.
19142 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19143 return SDValue(N, 0);
19144
19145 // Try to slice up N to more direct loads if the slices are mapped to
19146 // different register banks or pairing can take place.
19147 if (SliceUpLoad(N))
19148 return SDValue(N, 0);
19149
19150 return SDValue();
19151}
19152
19153namespace {
19154
19155/// Helper structure used to slice a load in smaller loads.
19156/// Basically a slice is obtained from the following sequence:
19157/// Origin = load Ty1, Base
19158/// Shift = srl Ty1 Origin, CstTy Amount
19159/// Inst = trunc Shift to Ty2
19160///
19161/// Then, it will be rewritten into:
19162/// Slice = load SliceTy, Base + SliceOffset
19163/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19164///
19165/// SliceTy is deduced from the number of bits that are actually used to
19166/// build Inst.
19167struct LoadedSlice {
19168 /// Helper structure used to compute the cost of a slice.
19169 struct Cost {
19170 /// Are we optimizing for code size.
19171 bool ForCodeSize = false;
19172
19173 /// Various cost.
19174 unsigned Loads = 0;
19175 unsigned Truncates = 0;
19176 unsigned CrossRegisterBanksCopies = 0;
19177 unsigned ZExts = 0;
19178 unsigned Shift = 0;
19179
19180 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19181
19182 /// Get the cost of one isolated slice.
19183 Cost(const LoadedSlice &LS, bool ForCodeSize)
19184 : ForCodeSize(ForCodeSize), Loads(1) {
19185 EVT TruncType = LS.Inst->getValueType(0);
19186 EVT LoadedType = LS.getLoadedType();
19187 if (TruncType != LoadedType &&
19188 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19189 ZExts = 1;
19190 }
19191
19192 /// Account for slicing gain in the current cost.
19193 /// Slicing provide a few gains like removing a shift or a
19194 /// truncate. This method allows to grow the cost of the original
19195 /// load with the gain from this slice.
19196 void addSliceGain(const LoadedSlice &LS) {
19197 // Each slice saves a truncate.
19198 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19199 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19200 ++Truncates;
19201 // If there is a shift amount, this slice gets rid of it.
19202 if (LS.Shift)
19203 ++Shift;
19204 // If this slice can merge a cross register bank copy, account for it.
19205 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19206 ++CrossRegisterBanksCopies;
19207 }
19208
19209 Cost &operator+=(const Cost &RHS) {
19210 Loads += RHS.Loads;
19211 Truncates += RHS.Truncates;
19212 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19213 ZExts += RHS.ZExts;
19214 Shift += RHS.Shift;
19215 return *this;
19216 }
19217
19218 bool operator==(const Cost &RHS) const {
19219 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19220 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19221 ZExts == RHS.ZExts && Shift == RHS.Shift;
19222 }
19223
19224 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19225
19226 bool operator<(const Cost &RHS) const {
19227 // Assume cross register banks copies are as expensive as loads.
19228 // FIXME: Do we want some more target hooks?
19229 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19230 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19231 // Unless we are optimizing for code size, consider the
19232 // expensive operation first.
19233 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19234 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19235 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19236 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19237 }
19238
19239 bool operator>(const Cost &RHS) const { return RHS < *this; }
19240
19241 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19242
19243 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19244 };
19245
19246 // The last instruction that represent the slice. This should be a
19247 // truncate instruction.
19248 SDNode *Inst;
19249
19250 // The original load instruction.
19251 LoadSDNode *Origin;
19252
19253 // The right shift amount in bits from the original load.
19254 unsigned Shift;
19255
19256 // The DAG from which Origin came from.
19257 // This is used to get some contextual information about legal types, etc.
19258 SelectionDAG *DAG;
19259
19260 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19261 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19262 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19263
19264 /// Get the bits used in a chunk of bits \p BitWidth large.
19265 /// \return Result is \p BitWidth and has used bits set to 1 and
19266 /// not used bits set to 0.
19267 APInt getUsedBits() const {
19268 // Reproduce the trunc(lshr) sequence:
19269 // - Start from the truncated value.
19270 // - Zero extend to the desired bit width.
19271 // - Shift left.
19272 assert(Origin && "No original load to compare against.");
19273 unsigned BitWidth = Origin->getValueSizeInBits(0);
19274 assert(Inst && "This slice is not bound to an instruction");
19275 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19276 "Extracted slice is bigger than the whole type!");
19277 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19278 UsedBits.setAllBits();
19279 UsedBits = UsedBits.zext(BitWidth);
19280 UsedBits <<= Shift;
19281 return UsedBits;
19282 }
19283
19284 /// Get the size of the slice to be loaded in bytes.
19285 unsigned getLoadedSize() const {
19286 unsigned SliceSize = getUsedBits().popcount();
19287 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19288 return SliceSize / 8;
19289 }
19290
19291 /// Get the type that will be loaded for this slice.
19292 /// Note: This may not be the final type for the slice.
19293 EVT getLoadedType() const {
19294 assert(DAG && "Missing context");
19295 LLVMContext &Ctxt = *DAG->getContext();
19296 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19297 }
19298
19299 /// Get the alignment of the load used for this slice.
19300 Align getAlign() const {
19301 Align Alignment = Origin->getAlign();
19302 uint64_t Offset = getOffsetFromBase();
19303 if (Offset != 0)
19304 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19305 return Alignment;
19306 }
19307
19308 /// Check if this slice can be rewritten with legal operations.
19309 bool isLegal() const {
19310 // An invalid slice is not legal.
19311 if (!Origin || !Inst || !DAG)
19312 return false;
19313
19314 // Offsets are for indexed load only, we do not handle that.
19315 if (!Origin->getOffset().isUndef())
19316 return false;
19317
19318 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19319
19320 // Check that the type is legal.
19321 EVT SliceType = getLoadedType();
19322 if (!TLI.isTypeLegal(SliceType))
19323 return false;
19324
19325 // Check that the load is legal for this type.
19326 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19327 return false;
19328
19329 // Check that the offset can be computed.
19330 // 1. Check its type.
19331 EVT PtrType = Origin->getBasePtr().getValueType();
19332 if (PtrType == MVT::Untyped || PtrType.isExtended())
19333 return false;
19334
19335 // 2. Check that it fits in the immediate.
19336 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19337 return false;
19338
19339 // 3. Check that the computation is legal.
19340 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19341 return false;
19342
19343 // Check that the zext is legal if it needs one.
19344 EVT TruncateType = Inst->getValueType(0);
19345 if (TruncateType != SliceType &&
19346 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19347 return false;
19348
19349 return true;
19350 }
19351
19352 /// Get the offset in bytes of this slice in the original chunk of
19353 /// bits.
19354 /// \pre DAG != nullptr.
19355 uint64_t getOffsetFromBase() const {
19356 assert(DAG && "Missing context.");
19357 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19358 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19359 uint64_t Offset = Shift / 8;
19360 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19361 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19362 "The size of the original loaded type is not a multiple of a"
19363 " byte.");
19364 // If Offset is bigger than TySizeInBytes, it means we are loading all
19365 // zeros. This should have been optimized before in the process.
19366 assert(TySizeInBytes > Offset &&
19367 "Invalid shift amount for given loaded size");
19368 if (IsBigEndian)
19369 Offset = TySizeInBytes - Offset - getLoadedSize();
19370 return Offset;
19371 }
19372
19373 /// Generate the sequence of instructions to load the slice
19374 /// represented by this object and redirect the uses of this slice to
19375 /// this new sequence of instructions.
19376 /// \pre this->Inst && this->Origin are valid Instructions and this
19377 /// object passed the legal check: LoadedSlice::isLegal returned true.
19378 /// \return The last instruction of the sequence used to load the slice.
19379 SDValue loadSlice() const {
19380 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19381 const SDValue &OldBaseAddr = Origin->getBasePtr();
19382 SDValue BaseAddr = OldBaseAddr;
19383 // Get the offset in that chunk of bytes w.r.t. the endianness.
19384 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19385 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19386 if (Offset) {
19387 // BaseAddr = BaseAddr + Offset.
19388 EVT ArithType = BaseAddr.getValueType();
19389 SDLoc DL(Origin);
19390 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19391 DAG->getConstant(Offset, DL, ArithType));
19392 }
19393
19394 // Create the type of the loaded slice according to its size.
19395 EVT SliceType = getLoadedType();
19396
19397 // Create the load for the slice.
19398 SDValue LastInst =
19399 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19401 Origin->getMemOperand()->getFlags());
19402 // If the final type is not the same as the loaded type, this means that
19403 // we have to pad with zero. Create a zero extend for that.
19404 EVT FinalType = Inst->getValueType(0);
19405 if (SliceType != FinalType)
19406 LastInst =
19407 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19408 return LastInst;
19409 }
19410
19411 /// Check if this slice can be merged with an expensive cross register
19412 /// bank copy. E.g.,
19413 /// i = load i32
19414 /// f = bitcast i32 i to float
19415 bool canMergeExpensiveCrossRegisterBankCopy() const {
19416 if (!Inst || !Inst->hasOneUse())
19417 return false;
19418 SDNode *Use = *Inst->use_begin();
19419 if (Use->getOpcode() != ISD::BITCAST)
19420 return false;
19421 assert(DAG && "Missing context");
19422 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19423 EVT ResVT = Use->getValueType(0);
19424 const TargetRegisterClass *ResRC =
19425 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19426 const TargetRegisterClass *ArgRC =
19427 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19428 Use->getOperand(0)->isDivergent());
19429 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19430 return false;
19431
19432 // At this point, we know that we perform a cross-register-bank copy.
19433 // Check if it is expensive.
19435 // Assume bitcasts are cheap, unless both register classes do not
19436 // explicitly share a common sub class.
19437 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19438 return false;
19439
19440 // Check if it will be merged with the load.
19441 // 1. Check the alignment / fast memory access constraint.
19442 unsigned IsFast = 0;
19443 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19444 Origin->getAddressSpace(), getAlign(),
19445 Origin->getMemOperand()->getFlags(), &IsFast) ||
19446 !IsFast)
19447 return false;
19448
19449 // 2. Check that the load is a legal operation for that type.
19450 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19451 return false;
19452
19453 // 3. Check that we do not have a zext in the way.
19454 if (Inst->getValueType(0) != getLoadedType())
19455 return false;
19456
19457 return true;
19458 }
19459};
19460
19461} // end anonymous namespace
19462
19463/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19464/// \p UsedBits looks like 0..0 1..1 0..0.
19465static bool areUsedBitsDense(const APInt &UsedBits) {
19466 // If all the bits are one, this is dense!
19467 if (UsedBits.isAllOnes())
19468 return true;
19469
19470 // Get rid of the unused bits on the right.
19471 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19472 // Get rid of the unused bits on the left.
19473 if (NarrowedUsedBits.countl_zero())
19474 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19475 // Check that the chunk of bits is completely used.
19476 return NarrowedUsedBits.isAllOnes();
19477}
19478
19479/// Check whether or not \p First and \p Second are next to each other
19480/// in memory. This means that there is no hole between the bits loaded
19481/// by \p First and the bits loaded by \p Second.
19482static bool areSlicesNextToEachOther(const LoadedSlice &First,
19483 const LoadedSlice &Second) {
19484 assert(First.Origin == Second.Origin && First.Origin &&
19485 "Unable to match different memory origins.");
19486 APInt UsedBits = First.getUsedBits();
19487 assert((UsedBits & Second.getUsedBits()) == 0 &&
19488 "Slices are not supposed to overlap.");
19489 UsedBits |= Second.getUsedBits();
19490 return areUsedBitsDense(UsedBits);
19491}
19492
19493/// Adjust the \p GlobalLSCost according to the target
19494/// paring capabilities and the layout of the slices.
19495/// \pre \p GlobalLSCost should account for at least as many loads as
19496/// there is in the slices in \p LoadedSlices.
19498 LoadedSlice::Cost &GlobalLSCost) {
19499 unsigned NumberOfSlices = LoadedSlices.size();
19500 // If there is less than 2 elements, no pairing is possible.
19501 if (NumberOfSlices < 2)
19502 return;
19503
19504 // Sort the slices so that elements that are likely to be next to each
19505 // other in memory are next to each other in the list.
19506 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19507 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19508 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19509 });
19510 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19511 // First (resp. Second) is the first (resp. Second) potentially candidate
19512 // to be placed in a paired load.
19513 const LoadedSlice *First = nullptr;
19514 const LoadedSlice *Second = nullptr;
19515 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19516 // Set the beginning of the pair.
19517 First = Second) {
19518 Second = &LoadedSlices[CurrSlice];
19519
19520 // If First is NULL, it means we start a new pair.
19521 // Get to the next slice.
19522 if (!First)
19523 continue;
19524
19525 EVT LoadedType = First->getLoadedType();
19526
19527 // If the types of the slices are different, we cannot pair them.
19528 if (LoadedType != Second->getLoadedType())
19529 continue;
19530
19531 // Check if the target supplies paired loads for this type.
19532 Align RequiredAlignment;
19533 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19534 // move to the next pair, this type is hopeless.
19535 Second = nullptr;
19536 continue;
19537 }
19538 // Check if we meet the alignment requirement.
19539 if (First->getAlign() < RequiredAlignment)
19540 continue;
19541
19542 // Check that both loads are next to each other in memory.
19543 if (!areSlicesNextToEachOther(*First, *Second))
19544 continue;
19545
19546 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19547 --GlobalLSCost.Loads;
19548 // Move to the next pair.
19549 Second = nullptr;
19550 }
19551}
19552
19553/// Check the profitability of all involved LoadedSlice.
19554/// Currently, it is considered profitable if there is exactly two
19555/// involved slices (1) which are (2) next to each other in memory, and
19556/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19557///
19558/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19559/// the elements themselves.
19560///
19561/// FIXME: When the cost model will be mature enough, we can relax
19562/// constraints (1) and (2).
19564 const APInt &UsedBits, bool ForCodeSize) {
19565 unsigned NumberOfSlices = LoadedSlices.size();
19567 return NumberOfSlices > 1;
19568
19569 // Check (1).
19570 if (NumberOfSlices != 2)
19571 return false;
19572
19573 // Check (2).
19574 if (!areUsedBitsDense(UsedBits))
19575 return false;
19576
19577 // Check (3).
19578 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19579 // The original code has one big load.
19580 OrigCost.Loads = 1;
19581 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19582 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19583 // Accumulate the cost of all the slices.
19584 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19585 GlobalSlicingCost += SliceCost;
19586
19587 // Account as cost in the original configuration the gain obtained
19588 // with the current slices.
19589 OrigCost.addSliceGain(LS);
19590 }
19591
19592 // If the target supports paired load, adjust the cost accordingly.
19593 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19594 return OrigCost > GlobalSlicingCost;
19595}
19596
19597/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19598/// operations, split it in the various pieces being extracted.
19599///
19600/// This sort of thing is introduced by SROA.
19601/// This slicing takes care not to insert overlapping loads.
19602/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19603bool DAGCombiner::SliceUpLoad(SDNode *N) {
19604 if (Level < AfterLegalizeDAG)
19605 return false;
19606
19607 LoadSDNode *LD = cast<LoadSDNode>(N);
19608 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19609 !LD->getValueType(0).isInteger())
19610 return false;
19611
19612 // The algorithm to split up a load of a scalable vector into individual
19613 // elements currently requires knowing the length of the loaded type,
19614 // so will need adjusting to work on scalable vectors.
19615 if (LD->getValueType(0).isScalableVector())
19616 return false;
19617
19618 // Keep track of already used bits to detect overlapping values.
19619 // In that case, we will just abort the transformation.
19620 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19621
19622 SmallVector<LoadedSlice, 4> LoadedSlices;
19623
19624 // Check if this load is used as several smaller chunks of bits.
19625 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19626 // of computation for each trunc.
19627 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19628 UI != UIEnd; ++UI) {
19629 // Skip the uses of the chain.
19630 if (UI.getUse().getResNo() != 0)
19631 continue;
19632
19633 SDNode *User = *UI;
19634 unsigned Shift = 0;
19635
19636 // Check if this is a trunc(lshr).
19637 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19638 isa<ConstantSDNode>(User->getOperand(1))) {
19639 Shift = User->getConstantOperandVal(1);
19640 User = *User->use_begin();
19641 }
19642
19643 // At this point, User is a Truncate, iff we encountered, trunc or
19644 // trunc(lshr).
19645 if (User->getOpcode() != ISD::TRUNCATE)
19646 return false;
19647
19648 // The width of the type must be a power of 2 and greater than 8-bits.
19649 // Otherwise the load cannot be represented in LLVM IR.
19650 // Moreover, if we shifted with a non-8-bits multiple, the slice
19651 // will be across several bytes. We do not support that.
19652 unsigned Width = User->getValueSizeInBits(0);
19653 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19654 return false;
19655
19656 // Build the slice for this chain of computations.
19657 LoadedSlice LS(User, LD, Shift, &DAG);
19658 APInt CurrentUsedBits = LS.getUsedBits();
19659
19660 // Check if this slice overlaps with another.
19661 if ((CurrentUsedBits & UsedBits) != 0)
19662 return false;
19663 // Update the bits used globally.
19664 UsedBits |= CurrentUsedBits;
19665
19666 // Check if the new slice would be legal.
19667 if (!LS.isLegal())
19668 return false;
19669
19670 // Record the slice.
19671 LoadedSlices.push_back(LS);
19672 }
19673
19674 // Abort slicing if it does not seem to be profitable.
19675 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19676 return false;
19677
19678 ++SlicedLoads;
19679
19680 // Rewrite each chain to use an independent load.
19681 // By construction, each chain can be represented by a unique load.
19682
19683 // Prepare the argument for the new token factor for all the slices.
19684 SmallVector<SDValue, 8> ArgChains;
19685 for (const LoadedSlice &LS : LoadedSlices) {
19686 SDValue SliceInst = LS.loadSlice();
19687 CombineTo(LS.Inst, SliceInst, true);
19688 if (SliceInst.getOpcode() != ISD::LOAD)
19689 SliceInst = SliceInst.getOperand(0);
19690 assert(SliceInst->getOpcode() == ISD::LOAD &&
19691 "It takes more than a zext to get to the loaded slice!!");
19692 ArgChains.push_back(SliceInst.getValue(1));
19693 }
19694
19695 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19696 ArgChains);
19697 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19698 AddToWorklist(Chain.getNode());
19699 return true;
19700}
19701
19702/// Check to see if V is (and load (ptr), imm), where the load is having
19703/// specific bytes cleared out. If so, return the byte size being masked out
19704/// and the shift amount.
19705static std::pair<unsigned, unsigned>
19707 std::pair<unsigned, unsigned> Result(0, 0);
19708
19709 // Check for the structure we're looking for.
19710 if (V->getOpcode() != ISD::AND ||
19711 !isa<ConstantSDNode>(V->getOperand(1)) ||
19712 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19713 return Result;
19714
19715 // Check the chain and pointer.
19716 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19717 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19718
19719 // This only handles simple types.
19720 if (V.getValueType() != MVT::i16 &&
19721 V.getValueType() != MVT::i32 &&
19722 V.getValueType() != MVT::i64)
19723 return Result;
19724
19725 // Check the constant mask. Invert it so that the bits being masked out are
19726 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19727 // follow the sign bit for uniformity.
19728 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19729 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19730 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19731 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19732 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19733 if (NotMaskLZ == 64) return Result; // All zero mask.
19734
19735 // See if we have a continuous run of bits. If so, we have 0*1+0*
19736 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19737 return Result;
19738
19739 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19740 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19741 NotMaskLZ -= 64-V.getValueSizeInBits();
19742
19743 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19744 switch (MaskedBytes) {
19745 case 1:
19746 case 2:
19747 case 4: break;
19748 default: return Result; // All one mask, or 5-byte mask.
19749 }
19750
19751 // Verify that the first bit starts at a multiple of mask so that the access
19752 // is aligned the same as the access width.
19753 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19754
19755 // For narrowing to be valid, it must be the case that the load the
19756 // immediately preceding memory operation before the store.
19757 if (LD == Chain.getNode())
19758 ; // ok.
19759 else if (Chain->getOpcode() == ISD::TokenFactor &&
19760 SDValue(LD, 1).hasOneUse()) {
19761 // LD has only 1 chain use so they are no indirect dependencies.
19762 if (!LD->isOperandOf(Chain.getNode()))
19763 return Result;
19764 } else
19765 return Result; // Fail.
19766
19767 Result.first = MaskedBytes;
19768 Result.second = NotMaskTZ/8;
19769 return Result;
19770}
19771
19772/// Check to see if IVal is something that provides a value as specified by
19773/// MaskInfo. If so, replace the specified store with a narrower store of
19774/// truncated IVal.
19775static SDValue
19776ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19777 SDValue IVal, StoreSDNode *St,
19778 DAGCombiner *DC) {
19779 unsigned NumBytes = MaskInfo.first;
19780 unsigned ByteShift = MaskInfo.second;
19781 SelectionDAG &DAG = DC->getDAG();
19782
19783 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19784 // that uses this. If not, this is not a replacement.
19785 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19786 ByteShift*8, (ByteShift+NumBytes)*8);
19787 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19788
19789 // Check that it is legal on the target to do this. It is legal if the new
19790 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19791 // legalization. If the source type is legal, but the store type isn't, see
19792 // if we can use a truncating store.
19793 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19794 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19795 bool UseTruncStore;
19796 if (DC->isTypeLegal(VT))
19797 UseTruncStore = false;
19798 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19799 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19800 UseTruncStore = true;
19801 else
19802 return SDValue();
19803
19804 // Can't do this for indexed stores.
19805 if (St->isIndexed())
19806 return SDValue();
19807
19808 // Check that the target doesn't think this is a bad idea.
19809 if (St->getMemOperand() &&
19810 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19811 *St->getMemOperand()))
19812 return SDValue();
19813
19814 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19815 // shifted by ByteShift and truncated down to NumBytes.
19816 if (ByteShift) {
19817 SDLoc DL(IVal);
19818 IVal = DAG.getNode(
19819 ISD::SRL, DL, IVal.getValueType(), IVal,
19820 DAG.getShiftAmountConstant(ByteShift * 8, IVal.getValueType(), DL));
19821 }
19822
19823 // Figure out the offset for the store and the alignment of the access.
19824 unsigned StOffset;
19825 if (DAG.getDataLayout().isLittleEndian())
19826 StOffset = ByteShift;
19827 else
19828 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19829
19830 SDValue Ptr = St->getBasePtr();
19831 if (StOffset) {
19832 SDLoc DL(IVal);
19834 }
19835
19836 ++OpsNarrowed;
19837 if (UseTruncStore)
19838 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19839 St->getPointerInfo().getWithOffset(StOffset),
19840 VT, St->getOriginalAlign());
19841
19842 // Truncate down to the new size.
19843 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19844
19845 return DAG
19846 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19847 St->getPointerInfo().getWithOffset(StOffset),
19848 St->getOriginalAlign());
19849}
19850
19851/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19852/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19853/// narrowing the load and store if it would end up being a win for performance
19854/// or code size.
19855SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19856 StoreSDNode *ST = cast<StoreSDNode>(N);
19857 if (!ST->isSimple())
19858 return SDValue();
19859
19860 SDValue Chain = ST->getChain();
19861 SDValue Value = ST->getValue();
19862 SDValue Ptr = ST->getBasePtr();
19863 EVT VT = Value.getValueType();
19864
19865 if (ST->isTruncatingStore() || VT.isVector())
19866 return SDValue();
19867
19868 unsigned Opc = Value.getOpcode();
19869
19870 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19871 !Value.hasOneUse())
19872 return SDValue();
19873
19874 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19875 // is a byte mask indicating a consecutive number of bytes, check to see if
19876 // Y is known to provide just those bytes. If so, we try to replace the
19877 // load + replace + store sequence with a single (narrower) store, which makes
19878 // the load dead.
19880 std::pair<unsigned, unsigned> MaskedLoad;
19881 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19882 if (MaskedLoad.first)
19883 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19884 Value.getOperand(1), ST,this))
19885 return NewST;
19886
19887 // Or is commutative, so try swapping X and Y.
19888 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19889 if (MaskedLoad.first)
19890 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19891 Value.getOperand(0), ST,this))
19892 return NewST;
19893 }
19894
19896 return SDValue();
19897
19898 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19899 return SDValue();
19900
19901 SDValue N0 = Value.getOperand(0);
19902 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19903 Chain == SDValue(N0.getNode(), 1)) {
19904 LoadSDNode *LD = cast<LoadSDNode>(N0);
19905 if (LD->getBasePtr() != Ptr ||
19906 LD->getPointerInfo().getAddrSpace() !=
19907 ST->getPointerInfo().getAddrSpace())
19908 return SDValue();
19909
19910 // Find the type to narrow it the load / op / store to.
19911 SDValue N1 = Value.getOperand(1);
19912 unsigned BitWidth = N1.getValueSizeInBits();
19913 APInt Imm = N1->getAsAPIntVal();
19914 if (Opc == ISD::AND)
19916 if (Imm == 0 || Imm.isAllOnes())
19917 return SDValue();
19918 unsigned ShAmt = Imm.countr_zero();
19919 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19920 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19921 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19922 // The narrowing should be profitable, the load/store operation should be
19923 // legal (or custom) and the store size should be equal to the NewVT width.
19924 while (NewBW < BitWidth &&
19925 (NewVT.getStoreSizeInBits() != NewBW ||
19926 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19927 !TLI.isNarrowingProfitable(VT, NewVT))) {
19928 NewBW = NextPowerOf2(NewBW);
19929 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19930 }
19931 if (NewBW >= BitWidth)
19932 return SDValue();
19933
19934 // If the lsb changed does not start at the type bitwidth boundary,
19935 // start at the previous one.
19936 if (ShAmt % NewBW)
19937 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19939 std::min(BitWidth, ShAmt + NewBW));
19940 if ((Imm & Mask) == Imm) {
19941 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19942 if (Opc == ISD::AND)
19943 NewImm ^= APInt::getAllOnes(NewBW);
19944 uint64_t PtrOff = ShAmt / 8;
19945 // For big endian targets, we need to adjust the offset to the pointer to
19946 // load the correct bytes.
19947 if (DAG.getDataLayout().isBigEndian())
19948 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19949
19950 unsigned IsFast = 0;
19951 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19952 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19953 LD->getAddressSpace(), NewAlign,
19954 LD->getMemOperand()->getFlags(), &IsFast) ||
19955 !IsFast)
19956 return SDValue();
19957
19958 SDValue NewPtr =
19960 SDValue NewLD =
19961 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19962 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19963 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19964 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19965 DAG.getConstant(NewImm, SDLoc(Value),
19966 NewVT));
19967 SDValue NewST =
19968 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19969 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19970
19971 AddToWorklist(NewPtr.getNode());
19972 AddToWorklist(NewLD.getNode());
19973 AddToWorklist(NewVal.getNode());
19974 WorklistRemover DeadNodes(*this);
19975 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19976 ++OpsNarrowed;
19977 return NewST;
19978 }
19979 }
19980
19981 return SDValue();
19982}
19983
19984/// For a given floating point load / store pair, if the load value isn't used
19985/// by any other operations, then consider transforming the pair to integer
19986/// load / store operations if the target deems the transformation profitable.
19987SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19988 StoreSDNode *ST = cast<StoreSDNode>(N);
19989 SDValue Value = ST->getValue();
19990 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19991 Value.hasOneUse()) {
19992 LoadSDNode *LD = cast<LoadSDNode>(Value);
19993 EVT VT = LD->getMemoryVT();
19994 if (!VT.isFloatingPoint() ||
19995 VT != ST->getMemoryVT() ||
19996 LD->isNonTemporal() ||
19997 ST->isNonTemporal() ||
19998 LD->getPointerInfo().getAddrSpace() != 0 ||
19999 ST->getPointerInfo().getAddrSpace() != 0)
20000 return SDValue();
20001
20002 TypeSize VTSize = VT.getSizeInBits();
20003
20004 // We don't know the size of scalable types at compile time so we cannot
20005 // create an integer of the equivalent size.
20006 if (VTSize.isScalable())
20007 return SDValue();
20008
20009 unsigned FastLD = 0, FastST = 0;
20010 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
20011 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
20012 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
20015 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20016 *LD->getMemOperand(), &FastLD) ||
20017 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
20018 *ST->getMemOperand(), &FastST) ||
20019 !FastLD || !FastST)
20020 return SDValue();
20021
20022 SDValue NewLD =
20023 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
20024 LD->getPointerInfo(), LD->getAlign());
20025
20026 SDValue NewST =
20027 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
20028 ST->getPointerInfo(), ST->getAlign());
20029
20030 AddToWorklist(NewLD.getNode());
20031 AddToWorklist(NewST.getNode());
20032 WorklistRemover DeadNodes(*this);
20033 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
20034 ++LdStFP2Int;
20035 return NewST;
20036 }
20037
20038 return SDValue();
20039}
20040
20041// This is a helper function for visitMUL to check the profitability
20042// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
20043// MulNode is the original multiply, AddNode is (add x, c1),
20044// and ConstNode is c2.
20045//
20046// If the (add x, c1) has multiple uses, we could increase
20047// the number of adds if we make this transformation.
20048// It would only be worth doing this if we can remove a
20049// multiply in the process. Check for that here.
20050// To illustrate:
20051// (A + c1) * c3
20052// (A + c2) * c3
20053// We're checking for cases where we have common "c3 * A" expressions.
20054bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
20055 SDValue ConstNode) {
20056 APInt Val;
20057
20058 // If the add only has one use, and the target thinks the folding is
20059 // profitable or does not lead to worse code, this would be OK to do.
20060 if (AddNode->hasOneUse() &&
20061 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
20062 return true;
20063
20064 // Walk all the users of the constant with which we're multiplying.
20065 for (SDNode *Use : ConstNode->uses()) {
20066 if (Use == MulNode) // This use is the one we're on right now. Skip it.
20067 continue;
20068
20069 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
20070 SDNode *OtherOp;
20071 SDNode *MulVar = AddNode.getOperand(0).getNode();
20072
20073 // OtherOp is what we're multiplying against the constant.
20074 if (Use->getOperand(0) == ConstNode)
20075 OtherOp = Use->getOperand(1).getNode();
20076 else
20077 OtherOp = Use->getOperand(0).getNode();
20078
20079 // Check to see if multiply is with the same operand of our "add".
20080 //
20081 // ConstNode = CONST
20082 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
20083 // ...
20084 // AddNode = (A + c1) <-- MulVar is A.
20085 // = AddNode * ConstNode <-- current visiting instruction.
20086 //
20087 // If we make this transformation, we will have a common
20088 // multiply (ConstNode * A) that we can save.
20089 if (OtherOp == MulVar)
20090 return true;
20091
20092 // Now check to see if a future expansion will give us a common
20093 // multiply.
20094 //
20095 // ConstNode = CONST
20096 // AddNode = (A + c1)
20097 // ... = AddNode * ConstNode <-- current visiting instruction.
20098 // ...
20099 // OtherOp = (A + c2)
20100 // Use = OtherOp * ConstNode <-- visiting Use.
20101 //
20102 // If we make this transformation, we will have a common
20103 // multiply (CONST * A) after we also do the same transformation
20104 // to the "t2" instruction.
20105 if (OtherOp->getOpcode() == ISD::ADD &&
20107 OtherOp->getOperand(0).getNode() == MulVar)
20108 return true;
20109 }
20110 }
20111
20112 // Didn't find a case where this would be profitable.
20113 return false;
20114}
20115
20116SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20117 unsigned NumStores) {
20120 SDLoc StoreDL(StoreNodes[0].MemNode);
20121
20122 for (unsigned i = 0; i < NumStores; ++i) {
20123 Visited.insert(StoreNodes[i].MemNode);
20124 }
20125
20126 // don't include nodes that are children or repeated nodes.
20127 for (unsigned i = 0; i < NumStores; ++i) {
20128 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20129 Chains.push_back(StoreNodes[i].MemNode->getChain());
20130 }
20131
20132 assert(!Chains.empty() && "Chain should have generated a chain");
20133 return DAG.getTokenFactor(StoreDL, Chains);
20134}
20135
20136bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20137 const Value *UnderlyingObj = nullptr;
20138 for (const auto &MemOp : StoreNodes) {
20139 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20140 // Pseudo value like stack frame has its own frame index and size, should
20141 // not use the first store's frame index for other frames.
20142 if (MMO->getPseudoValue())
20143 return false;
20144
20145 if (!MMO->getValue())
20146 return false;
20147
20148 const Value *Obj = getUnderlyingObject(MMO->getValue());
20149
20150 if (UnderlyingObj && UnderlyingObj != Obj)
20151 return false;
20152
20153 if (!UnderlyingObj)
20154 UnderlyingObj = Obj;
20155 }
20156
20157 return true;
20158}
20159
20160bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20161 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20162 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20163 // Make sure we have something to merge.
20164 if (NumStores < 2)
20165 return false;
20166
20167 assert((!UseTrunc || !UseVector) &&
20168 "This optimization cannot emit a vector truncating store");
20169
20170 // The latest Node in the DAG.
20171 SDLoc DL(StoreNodes[0].MemNode);
20172
20173 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20174 unsigned SizeInBits = NumStores * ElementSizeBits;
20175 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20176
20177 std::optional<MachineMemOperand::Flags> Flags;
20178 AAMDNodes AAInfo;
20179 for (unsigned I = 0; I != NumStores; ++I) {
20180 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20181 if (!Flags) {
20182 Flags = St->getMemOperand()->getFlags();
20183 AAInfo = St->getAAInfo();
20184 continue;
20185 }
20186 // Skip merging if there's an inconsistent flag.
20187 if (Flags != St->getMemOperand()->getFlags())
20188 return false;
20189 // Concatenate AA metadata.
20190 AAInfo = AAInfo.concat(St->getAAInfo());
20191 }
20192
20193 EVT StoreTy;
20194 if (UseVector) {
20195 unsigned Elts = NumStores * NumMemElts;
20196 // Get the type for the merged vector store.
20197 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20198 } else
20199 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20200
20201 SDValue StoredVal;
20202 if (UseVector) {
20203 if (IsConstantSrc) {
20204 SmallVector<SDValue, 8> BuildVector;
20205 for (unsigned I = 0; I != NumStores; ++I) {
20206 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20207 SDValue Val = St->getValue();
20208 // If constant is of the wrong type, convert it now. This comes up
20209 // when one of our stores was truncating.
20210 if (MemVT != Val.getValueType()) {
20211 Val = peekThroughBitcasts(Val);
20212 // Deal with constants of wrong size.
20213 if (ElementSizeBits != Val.getValueSizeInBits()) {
20214 auto *C = dyn_cast<ConstantSDNode>(Val);
20215 if (!C)
20216 // Not clear how to truncate FP values.
20217 // TODO: Handle truncation of build_vector constants
20218 return false;
20219
20220 EVT IntMemVT =
20222 Val = DAG.getConstant(C->getAPIntValue()
20223 .zextOrTrunc(Val.getValueSizeInBits())
20224 .zextOrTrunc(ElementSizeBits),
20225 SDLoc(C), IntMemVT);
20226 }
20227 // Make sure correctly size type is the correct type.
20228 Val = DAG.getBitcast(MemVT, Val);
20229 }
20230 BuildVector.push_back(Val);
20231 }
20232 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20234 DL, StoreTy, BuildVector);
20235 } else {
20237 for (unsigned i = 0; i < NumStores; ++i) {
20238 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20240 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20241 // type MemVT. If the underlying value is not the correct
20242 // type, but it is an extraction of an appropriate vector we
20243 // can recast Val to be of the correct type. This may require
20244 // converting between EXTRACT_VECTOR_ELT and
20245 // EXTRACT_SUBVECTOR.
20246 if ((MemVT != Val.getValueType()) &&
20249 EVT MemVTScalarTy = MemVT.getScalarType();
20250 // We may need to add a bitcast here to get types to line up.
20251 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20252 Val = DAG.getBitcast(MemVT, Val);
20253 } else if (MemVT.isVector() &&
20255 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20256 } else {
20257 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20259 SDValue Vec = Val.getOperand(0);
20260 SDValue Idx = Val.getOperand(1);
20261 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20262 }
20263 }
20264 Ops.push_back(Val);
20265 }
20266
20267 // Build the extracted vector elements back into a vector.
20268 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20270 DL, StoreTy, Ops);
20271 }
20272 } else {
20273 // We should always use a vector store when merging extracted vector
20274 // elements, so this path implies a store of constants.
20275 assert(IsConstantSrc && "Merged vector elements should use vector store");
20276
20277 APInt StoreInt(SizeInBits, 0);
20278
20279 // Construct a single integer constant which is made of the smaller
20280 // constant inputs.
20281 bool IsLE = DAG.getDataLayout().isLittleEndian();
20282 for (unsigned i = 0; i < NumStores; ++i) {
20283 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20284 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20285
20286 SDValue Val = St->getValue();
20287 Val = peekThroughBitcasts(Val);
20288 StoreInt <<= ElementSizeBits;
20289 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20290 StoreInt |= C->getAPIntValue()
20291 .zextOrTrunc(ElementSizeBits)
20292 .zextOrTrunc(SizeInBits);
20293 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20294 StoreInt |= C->getValueAPF()
20295 .bitcastToAPInt()
20296 .zextOrTrunc(ElementSizeBits)
20297 .zextOrTrunc(SizeInBits);
20298 // If fp truncation is necessary give up for now.
20299 if (MemVT.getSizeInBits() != ElementSizeBits)
20300 return false;
20301 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20303 // Not yet handled
20304 return false;
20305 } else {
20306 llvm_unreachable("Invalid constant element type");
20307 }
20308 }
20309
20310 // Create the new Load and Store operations.
20311 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20312 }
20313
20314 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20315 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20316 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20317
20318 // make sure we use trunc store if it's necessary to be legal.
20319 // When generate the new widen store, if the first store's pointer info can
20320 // not be reused, discard the pointer info except the address space because
20321 // now the widen store can not be represented by the original pointer info
20322 // which is for the narrow memory object.
20323 SDValue NewStore;
20324 if (!UseTrunc) {
20325 NewStore = DAG.getStore(
20326 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20327 CanReusePtrInfo
20328 ? FirstInChain->getPointerInfo()
20329 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20330 FirstInChain->getAlign(), *Flags, AAInfo);
20331 } else { // Must be realized as a trunc store
20332 EVT LegalizedStoredValTy =
20333 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20334 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20335 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20336 SDValue ExtendedStoreVal =
20337 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20338 LegalizedStoredValTy);
20339 NewStore = DAG.getTruncStore(
20340 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20341 CanReusePtrInfo
20342 ? FirstInChain->getPointerInfo()
20343 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20344 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20345 AAInfo);
20346 }
20347
20348 // Replace all merged stores with the new store.
20349 for (unsigned i = 0; i < NumStores; ++i)
20350 CombineTo(StoreNodes[i].MemNode, NewStore);
20351
20352 AddToWorklist(NewChain.getNode());
20353 return true;
20354}
20355
20356void DAGCombiner::getStoreMergeCandidates(
20357 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20358 SDNode *&RootNode) {
20359 // This holds the base pointer, index, and the offset in bytes from the base
20360 // pointer. We must have a base and an offset. Do not handle stores to undef
20361 // base pointers.
20363 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20364 return;
20365
20367 StoreSource StoreSrc = getStoreSource(Val);
20368 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20369
20370 // Match on loadbaseptr if relevant.
20371 EVT MemVT = St->getMemoryVT();
20372 BaseIndexOffset LBasePtr;
20373 EVT LoadVT;
20374 if (StoreSrc == StoreSource::Load) {
20375 auto *Ld = cast<LoadSDNode>(Val);
20376 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20377 LoadVT = Ld->getMemoryVT();
20378 // Load and store should be the same type.
20379 if (MemVT != LoadVT)
20380 return;
20381 // Loads must only have one use.
20382 if (!Ld->hasNUsesOfValue(1, 0))
20383 return;
20384 // The memory operands must not be volatile/indexed/atomic.
20385 // TODO: May be able to relax for unordered atomics (see D66309)
20386 if (!Ld->isSimple() || Ld->isIndexed())
20387 return;
20388 }
20389 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20390 int64_t &Offset) -> bool {
20391 // The memory operands must not be volatile/indexed/atomic.
20392 // TODO: May be able to relax for unordered atomics (see D66309)
20393 if (!Other->isSimple() || Other->isIndexed())
20394 return false;
20395 // Don't mix temporal stores with non-temporal stores.
20396 if (St->isNonTemporal() != Other->isNonTemporal())
20397 return false;
20399 return false;
20400 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20401 // Allow merging constants of different types as integers.
20402 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20403 : Other->getMemoryVT() != MemVT;
20404 switch (StoreSrc) {
20405 case StoreSource::Load: {
20406 if (NoTypeMatch)
20407 return false;
20408 // The Load's Base Ptr must also match.
20409 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20410 if (!OtherLd)
20411 return false;
20412 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20413 if (LoadVT != OtherLd->getMemoryVT())
20414 return false;
20415 // Loads must only have one use.
20416 if (!OtherLd->hasNUsesOfValue(1, 0))
20417 return false;
20418 // The memory operands must not be volatile/indexed/atomic.
20419 // TODO: May be able to relax for unordered atomics (see D66309)
20420 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20421 return false;
20422 // Don't mix temporal loads with non-temporal loads.
20423 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20424 return false;
20425 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20426 *OtherLd))
20427 return false;
20428 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20429 return false;
20430 break;
20431 }
20432 case StoreSource::Constant:
20433 if (NoTypeMatch)
20434 return false;
20435 if (getStoreSource(OtherBC) != StoreSource::Constant)
20436 return false;
20437 break;
20438 case StoreSource::Extract:
20439 // Do not merge truncated stores here.
20440 if (Other->isTruncatingStore())
20441 return false;
20442 if (!MemVT.bitsEq(OtherBC.getValueType()))
20443 return false;
20444 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20445 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20446 return false;
20447 break;
20448 default:
20449 llvm_unreachable("Unhandled store source for merging");
20450 }
20452 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20453 };
20454
20455 // Check if the pair of StoreNode and the RootNode already bail out many
20456 // times which is over the limit in dependence check.
20457 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20458 SDNode *RootNode) -> bool {
20459 auto RootCount = StoreRootCountMap.find(StoreNode);
20460 return RootCount != StoreRootCountMap.end() &&
20461 RootCount->second.first == RootNode &&
20462 RootCount->second.second > StoreMergeDependenceLimit;
20463 };
20464
20465 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20466 // This must be a chain use.
20467 if (UseIter.getOperandNo() != 0)
20468 return;
20469 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20471 int64_t PtrDiff;
20472 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20473 !OverLimitInDependenceCheck(OtherStore, RootNode))
20474 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20475 }
20476 };
20477
20478 // We looking for a root node which is an ancestor to all mergable
20479 // stores. We search up through a load, to our root and then down
20480 // through all children. For instance we will find Store{1,2,3} if
20481 // St is Store1, Store2. or Store3 where the root is not a load
20482 // which always true for nonvolatile ops. TODO: Expand
20483 // the search to find all valid candidates through multiple layers of loads.
20484 //
20485 // Root
20486 // |-------|-------|
20487 // Load Load Store3
20488 // | |
20489 // Store1 Store2
20490 //
20491 // FIXME: We should be able to climb and
20492 // descend TokenFactors to find candidates as well.
20493
20494 RootNode = St->getChain().getNode();
20495
20496 unsigned NumNodesExplored = 0;
20497 const unsigned MaxSearchNodes = 1024;
20498 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20499 RootNode = Ldn->getChain().getNode();
20500 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20501 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20502 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20503 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20504 TryToAddCandidate(I2);
20505 }
20506 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20507 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20508 TryToAddCandidate(I);
20509 }
20510 }
20511 } else {
20512 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20513 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20514 TryToAddCandidate(I);
20515 }
20516}
20517
20518// We need to check that merging these stores does not cause a loop in the
20519// DAG. Any store candidate may depend on another candidate indirectly through
20520// its operands. Check in parallel by searching up from operands of candidates.
20521bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20522 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20523 SDNode *RootNode) {
20524 // FIXME: We should be able to truncate a full search of
20525 // predecessors by doing a BFS and keeping tabs the originating
20526 // stores from which worklist nodes come from in a similar way to
20527 // TokenFactor simplfication.
20528
20531
20532 // RootNode is a predecessor to all candidates so we need not search
20533 // past it. Add RootNode (peeking through TokenFactors). Do not count
20534 // these towards size check.
20535
20536 Worklist.push_back(RootNode);
20537 while (!Worklist.empty()) {
20538 auto N = Worklist.pop_back_val();
20539 if (!Visited.insert(N).second)
20540 continue; // Already present in Visited.
20541 if (N->getOpcode() == ISD::TokenFactor) {
20542 for (SDValue Op : N->ops())
20543 Worklist.push_back(Op.getNode());
20544 }
20545 }
20546
20547 // Don't count pruning nodes towards max.
20548 unsigned int Max = 1024 + Visited.size();
20549 // Search Ops of store candidates.
20550 for (unsigned i = 0; i < NumStores; ++i) {
20551 SDNode *N = StoreNodes[i].MemNode;
20552 // Of the 4 Store Operands:
20553 // * Chain (Op 0) -> We have already considered these
20554 // in candidate selection, but only by following the
20555 // chain dependencies. We could still have a chain
20556 // dependency to a load, that has a non-chain dep to
20557 // another load, that depends on a store, etc. So it is
20558 // possible to have dependencies that consist of a mix
20559 // of chain and non-chain deps, and we need to include
20560 // chain operands in the analysis here..
20561 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20562 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20563 // but aren't necessarily fromt the same base node, so
20564 // cycles possible (e.g. via indexed store).
20565 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20566 // non-indexed stores). Not constant on all targets (e.g. ARM)
20567 // and so can participate in a cycle.
20568 for (const SDValue &Op : N->op_values())
20569 Worklist.push_back(Op.getNode());
20570 }
20571 // Search through DAG. We can stop early if we find a store node.
20572 for (unsigned i = 0; i < NumStores; ++i)
20573 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20574 Max)) {
20575 // If the searching bail out, record the StoreNode and RootNode in the
20576 // StoreRootCountMap. If we have seen the pair many times over a limit,
20577 // we won't add the StoreNode into StoreNodes set again.
20578 if (Visited.size() >= Max) {
20579 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20580 if (RootCount.first == RootNode)
20581 RootCount.second++;
20582 else
20583 RootCount = {RootNode, 1};
20584 }
20585 return false;
20586 }
20587 return true;
20588}
20589
20590unsigned
20591DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20592 int64_t ElementSizeBytes) const {
20593 while (true) {
20594 // Find a store past the width of the first store.
20595 size_t StartIdx = 0;
20596 while ((StartIdx + 1 < StoreNodes.size()) &&
20597 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20598 StoreNodes[StartIdx + 1].OffsetFromBase)
20599 ++StartIdx;
20600
20601 // Bail if we don't have enough candidates to merge.
20602 if (StartIdx + 1 >= StoreNodes.size())
20603 return 0;
20604
20605 // Trim stores that overlapped with the first store.
20606 if (StartIdx)
20607 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20608
20609 // Scan the memory operations on the chain and find the first
20610 // non-consecutive store memory address.
20611 unsigned NumConsecutiveStores = 1;
20612 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20613 // Check that the addresses are consecutive starting from the second
20614 // element in the list of stores.
20615 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20616 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20617 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20618 break;
20619 NumConsecutiveStores = i + 1;
20620 }
20621 if (NumConsecutiveStores > 1)
20622 return NumConsecutiveStores;
20623
20624 // There are no consecutive stores at the start of the list.
20625 // Remove the first store and try again.
20626 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20627 }
20628}
20629
20630bool DAGCombiner::tryStoreMergeOfConstants(
20631 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20632 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20633 LLVMContext &Context = *DAG.getContext();
20634 const DataLayout &DL = DAG.getDataLayout();
20635 int64_t ElementSizeBytes = MemVT.getStoreSize();
20636 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20637 bool MadeChange = false;
20638
20639 // Store the constants into memory as one consecutive store.
20640 while (NumConsecutiveStores >= 2) {
20641 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20642 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20643 Align FirstStoreAlign = FirstInChain->getAlign();
20644 unsigned LastLegalType = 1;
20645 unsigned LastLegalVectorType = 1;
20646 bool LastIntegerTrunc = false;
20647 bool NonZero = false;
20648 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20649 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20650 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20651 SDValue StoredVal = ST->getValue();
20652 bool IsElementZero = false;
20653 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20654 IsElementZero = C->isZero();
20655 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20656 IsElementZero = C->getConstantFPValue()->isNullValue();
20657 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20658 IsElementZero = true;
20659 if (IsElementZero) {
20660 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20661 FirstZeroAfterNonZero = i;
20662 }
20663 NonZero |= !IsElementZero;
20664
20665 // Find a legal type for the constant store.
20666 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20667 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20668 unsigned IsFast = 0;
20669
20670 // Break early when size is too large to be legal.
20671 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20672 break;
20673
20674 if (TLI.isTypeLegal(StoreTy) &&
20675 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20676 DAG.getMachineFunction()) &&
20677 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20678 *FirstInChain->getMemOperand(), &IsFast) &&
20679 IsFast) {
20680 LastIntegerTrunc = false;
20681 LastLegalType = i + 1;
20682 // Or check whether a truncstore is legal.
20683 } else if (TLI.getTypeAction(Context, StoreTy) ==
20685 EVT LegalizedStoredValTy =
20686 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20687 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20688 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20689 DAG.getMachineFunction()) &&
20690 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20691 *FirstInChain->getMemOperand(), &IsFast) &&
20692 IsFast) {
20693 LastIntegerTrunc = true;
20694 LastLegalType = i + 1;
20695 }
20696 }
20697
20698 // We only use vectors if the target allows it and the function is not
20699 // marked with the noimplicitfloat attribute.
20700 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20701 AllowVectors) {
20702 // Find a legal type for the vector store.
20703 unsigned Elts = (i + 1) * NumMemElts;
20704 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20705 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20706 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20707 TLI.allowsMemoryAccess(Context, DL, Ty,
20708 *FirstInChain->getMemOperand(), &IsFast) &&
20709 IsFast)
20710 LastLegalVectorType = i + 1;
20711 }
20712 }
20713
20714 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20715 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20716 bool UseTrunc = LastIntegerTrunc && !UseVector;
20717
20718 // Check if we found a legal integer type that creates a meaningful
20719 // merge.
20720 if (NumElem < 2) {
20721 // We know that candidate stores are in order and of correct
20722 // shape. While there is no mergeable sequence from the
20723 // beginning one may start later in the sequence. The only
20724 // reason a merge of size N could have failed where another of
20725 // the same size would not have, is if the alignment has
20726 // improved or we've dropped a non-zero value. Drop as many
20727 // candidates as we can here.
20728 unsigned NumSkip = 1;
20729 while ((NumSkip < NumConsecutiveStores) &&
20730 (NumSkip < FirstZeroAfterNonZero) &&
20731 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20732 NumSkip++;
20733
20734 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20735 NumConsecutiveStores -= NumSkip;
20736 continue;
20737 }
20738
20739 // Check that we can merge these candidates without causing a cycle.
20740 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20741 RootNode)) {
20742 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20743 NumConsecutiveStores -= NumElem;
20744 continue;
20745 }
20746
20747 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20748 /*IsConstantSrc*/ true,
20749 UseVector, UseTrunc);
20750
20751 // Remove merged stores for next iteration.
20752 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20753 NumConsecutiveStores -= NumElem;
20754 }
20755 return MadeChange;
20756}
20757
20758bool DAGCombiner::tryStoreMergeOfExtracts(
20759 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20760 EVT MemVT, SDNode *RootNode) {
20761 LLVMContext &Context = *DAG.getContext();
20762 const DataLayout &DL = DAG.getDataLayout();
20763 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20764 bool MadeChange = false;
20765
20766 // Loop on Consecutive Stores on success.
20767 while (NumConsecutiveStores >= 2) {
20768 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20769 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20770 Align FirstStoreAlign = FirstInChain->getAlign();
20771 unsigned NumStoresToMerge = 1;
20772 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20773 // Find a legal type for the vector store.
20774 unsigned Elts = (i + 1) * NumMemElts;
20775 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20776 unsigned IsFast = 0;
20777
20778 // Break early when size is too large to be legal.
20779 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20780 break;
20781
20782 if (TLI.isTypeLegal(Ty) &&
20783 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20784 TLI.allowsMemoryAccess(Context, DL, Ty,
20785 *FirstInChain->getMemOperand(), &IsFast) &&
20786 IsFast)
20787 NumStoresToMerge = i + 1;
20788 }
20789
20790 // Check if we found a legal integer type creating a meaningful
20791 // merge.
20792 if (NumStoresToMerge < 2) {
20793 // We know that candidate stores are in order and of correct
20794 // shape. While there is no mergeable sequence from the
20795 // beginning one may start later in the sequence. The only
20796 // reason a merge of size N could have failed where another of
20797 // the same size would not have, is if the alignment has
20798 // improved. Drop as many candidates as we can here.
20799 unsigned NumSkip = 1;
20800 while ((NumSkip < NumConsecutiveStores) &&
20801 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20802 NumSkip++;
20803
20804 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20805 NumConsecutiveStores -= NumSkip;
20806 continue;
20807 }
20808
20809 // Check that we can merge these candidates without causing a cycle.
20810 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20811 RootNode)) {
20812 StoreNodes.erase(StoreNodes.begin(),
20813 StoreNodes.begin() + NumStoresToMerge);
20814 NumConsecutiveStores -= NumStoresToMerge;
20815 continue;
20816 }
20817
20818 MadeChange |= mergeStoresOfConstantsOrVecElts(
20819 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20820 /*UseVector*/ true, /*UseTrunc*/ false);
20821
20822 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20823 NumConsecutiveStores -= NumStoresToMerge;
20824 }
20825 return MadeChange;
20826}
20827
20828bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20829 unsigned NumConsecutiveStores, EVT MemVT,
20830 SDNode *RootNode, bool AllowVectors,
20831 bool IsNonTemporalStore,
20832 bool IsNonTemporalLoad) {
20833 LLVMContext &Context = *DAG.getContext();
20834 const DataLayout &DL = DAG.getDataLayout();
20835 int64_t ElementSizeBytes = MemVT.getStoreSize();
20836 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20837 bool MadeChange = false;
20838
20839 // Look for load nodes which are used by the stored values.
20840 SmallVector<MemOpLink, 8> LoadNodes;
20841
20842 // Find acceptable loads. Loads need to have the same chain (token factor),
20843 // must not be zext, volatile, indexed, and they must be consecutive.
20844 BaseIndexOffset LdBasePtr;
20845
20846 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20847 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20849 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20850
20851 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20852 // If this is not the first ptr that we check.
20853 int64_t LdOffset = 0;
20854 if (LdBasePtr.getBase().getNode()) {
20855 // The base ptr must be the same.
20856 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20857 break;
20858 } else {
20859 // Check that all other base pointers are the same as this one.
20860 LdBasePtr = LdPtr;
20861 }
20862
20863 // We found a potential memory operand to merge.
20864 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20865 }
20866
20867 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20868 Align RequiredAlignment;
20869 bool NeedRotate = false;
20870 if (LoadNodes.size() == 2) {
20871 // If we have load/store pair instructions and we only have two values,
20872 // don't bother merging.
20873 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20874 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20875 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20876 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20877 break;
20878 }
20879 // If the loads are reversed, see if we can rotate the halves into place.
20880 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20881 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20882 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20883 if (Offset0 - Offset1 == ElementSizeBytes &&
20884 (hasOperation(ISD::ROTL, PairVT) ||
20885 hasOperation(ISD::ROTR, PairVT))) {
20886 std::swap(LoadNodes[0], LoadNodes[1]);
20887 NeedRotate = true;
20888 }
20889 }
20890 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20891 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20892 Align FirstStoreAlign = FirstInChain->getAlign();
20893 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20894
20895 // Scan the memory operations on the chain and find the first
20896 // non-consecutive load memory address. These variables hold the index in
20897 // the store node array.
20898
20899 unsigned LastConsecutiveLoad = 1;
20900
20901 // This variable refers to the size and not index in the array.
20902 unsigned LastLegalVectorType = 1;
20903 unsigned LastLegalIntegerType = 1;
20904 bool isDereferenceable = true;
20905 bool DoIntegerTruncate = false;
20906 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20907 SDValue LoadChain = FirstLoad->getChain();
20908 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20909 // All loads must share the same chain.
20910 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20911 break;
20912
20913 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20914 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20915 break;
20916 LastConsecutiveLoad = i;
20917
20918 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20919 isDereferenceable = false;
20920
20921 // Find a legal type for the vector store.
20922 unsigned Elts = (i + 1) * NumMemElts;
20923 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20924
20925 // Break early when size is too large to be legal.
20926 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20927 break;
20928
20929 unsigned IsFastSt = 0;
20930 unsigned IsFastLd = 0;
20931 // Don't try vector types if we need a rotate. We may still fail the
20932 // legality checks for the integer type, but we can't handle the rotate
20933 // case with vectors.
20934 // FIXME: We could use a shuffle in place of the rotate.
20935 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20936 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20937 DAG.getMachineFunction()) &&
20938 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20939 *FirstInChain->getMemOperand(), &IsFastSt) &&
20940 IsFastSt &&
20941 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20942 *FirstLoad->getMemOperand(), &IsFastLd) &&
20943 IsFastLd) {
20944 LastLegalVectorType = i + 1;
20945 }
20946
20947 // Find a legal type for the integer store.
20948 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20949 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20950 if (TLI.isTypeLegal(StoreTy) &&
20951 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20952 DAG.getMachineFunction()) &&
20953 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20954 *FirstInChain->getMemOperand(), &IsFastSt) &&
20955 IsFastSt &&
20956 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20957 *FirstLoad->getMemOperand(), &IsFastLd) &&
20958 IsFastLd) {
20959 LastLegalIntegerType = i + 1;
20960 DoIntegerTruncate = false;
20961 // Or check whether a truncstore and extload is legal.
20962 } else if (TLI.getTypeAction(Context, StoreTy) ==
20964 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20965 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20966 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20967 DAG.getMachineFunction()) &&
20968 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20969 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20970 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20971 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20972 *FirstInChain->getMemOperand(), &IsFastSt) &&
20973 IsFastSt &&
20974 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20975 *FirstLoad->getMemOperand(), &IsFastLd) &&
20976 IsFastLd) {
20977 LastLegalIntegerType = i + 1;
20978 DoIntegerTruncate = true;
20979 }
20980 }
20981 }
20982
20983 // Only use vector types if the vector type is larger than the integer
20984 // type. If they are the same, use integers.
20985 bool UseVectorTy =
20986 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20987 unsigned LastLegalType =
20988 std::max(LastLegalVectorType, LastLegalIntegerType);
20989
20990 // We add +1 here because the LastXXX variables refer to location while
20991 // the NumElem refers to array/index size.
20992 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20993 NumElem = std::min(LastLegalType, NumElem);
20994 Align FirstLoadAlign = FirstLoad->getAlign();
20995
20996 if (NumElem < 2) {
20997 // We know that candidate stores are in order and of correct
20998 // shape. While there is no mergeable sequence from the
20999 // beginning one may start later in the sequence. The only
21000 // reason a merge of size N could have failed where another of
21001 // the same size would not have is if the alignment or either
21002 // the load or store has improved. Drop as many candidates as we
21003 // can here.
21004 unsigned NumSkip = 1;
21005 while ((NumSkip < LoadNodes.size()) &&
21006 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
21007 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
21008 NumSkip++;
21009 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
21010 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
21011 NumConsecutiveStores -= NumSkip;
21012 continue;
21013 }
21014
21015 // Check that we can merge these candidates without causing a cycle.
21016 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
21017 RootNode)) {
21018 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21019 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21020 NumConsecutiveStores -= NumElem;
21021 continue;
21022 }
21023
21024 // Find if it is better to use vectors or integers to load and store
21025 // to memory.
21026 EVT JointMemOpVT;
21027 if (UseVectorTy) {
21028 // Find a legal type for the vector store.
21029 unsigned Elts = NumElem * NumMemElts;
21030 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
21031 } else {
21032 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
21033 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
21034 }
21035
21036 SDLoc LoadDL(LoadNodes[0].MemNode);
21037 SDLoc StoreDL(StoreNodes[0].MemNode);
21038
21039 // The merged loads are required to have the same incoming chain, so
21040 // using the first's chain is acceptable.
21041
21042 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
21043 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
21044 AddToWorklist(NewStoreChain.getNode());
21045
21046 MachineMemOperand::Flags LdMMOFlags =
21047 isDereferenceable ? MachineMemOperand::MODereferenceable
21049 if (IsNonTemporalLoad)
21051
21052 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
21053
21054 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
21057
21058 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
21059
21060 SDValue NewLoad, NewStore;
21061 if (UseVectorTy || !DoIntegerTruncate) {
21062 NewLoad = DAG.getLoad(
21063 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
21064 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
21065 SDValue StoreOp = NewLoad;
21066 if (NeedRotate) {
21067 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
21068 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
21069 "Unexpected type for rotate-able load pair");
21070 SDValue RotAmt =
21071 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
21072 // Target can convert to the identical ROTR if it does not have ROTL.
21073 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
21074 }
21075 NewStore = DAG.getStore(
21076 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
21077 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21078 : MachinePointerInfo(FirstStoreAS),
21079 FirstStoreAlign, StMMOFlags);
21080 } else { // This must be the truncstore/extload case
21081 EVT ExtendedTy =
21082 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
21083 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
21084 FirstLoad->getChain(), FirstLoad->getBasePtr(),
21085 FirstLoad->getPointerInfo(), JointMemOpVT,
21086 FirstLoadAlign, LdMMOFlags);
21087 NewStore = DAG.getTruncStore(
21088 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
21089 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21090 : MachinePointerInfo(FirstStoreAS),
21091 JointMemOpVT, FirstInChain->getAlign(),
21092 FirstInChain->getMemOperand()->getFlags());
21093 }
21094
21095 // Transfer chain users from old loads to the new load.
21096 for (unsigned i = 0; i < NumElem; ++i) {
21097 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21099 SDValue(NewLoad.getNode(), 1));
21100 }
21101
21102 // Replace all stores with the new store. Recursively remove corresponding
21103 // values if they are no longer used.
21104 for (unsigned i = 0; i < NumElem; ++i) {
21105 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21106 CombineTo(StoreNodes[i].MemNode, NewStore);
21107 if (Val->use_empty())
21108 recursivelyDeleteUnusedNodes(Val.getNode());
21109 }
21110
21111 MadeChange = true;
21112 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21113 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21114 NumConsecutiveStores -= NumElem;
21115 }
21116 return MadeChange;
21117}
21118
21119bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21120 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21121 return false;
21122
21123 // TODO: Extend this function to merge stores of scalable vectors.
21124 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21125 // store since we know <vscale x 16 x i8> is exactly twice as large as
21126 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21127 EVT MemVT = St->getMemoryVT();
21128 if (MemVT.isScalableVT())
21129 return false;
21130 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21131 return false;
21132
21133 // This function cannot currently deal with non-byte-sized memory sizes.
21134 int64_t ElementSizeBytes = MemVT.getStoreSize();
21135 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21136 return false;
21137
21138 // Do not bother looking at stored values that are not constants, loads, or
21139 // extracted vector elements.
21140 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21141 const StoreSource StoreSrc = getStoreSource(StoredVal);
21142 if (StoreSrc == StoreSource::Unknown)
21143 return false;
21144
21145 SmallVector<MemOpLink, 8> StoreNodes;
21146 SDNode *RootNode;
21147 // Find potential store merge candidates by searching through chain sub-DAG
21148 getStoreMergeCandidates(St, StoreNodes, RootNode);
21149
21150 // Check if there is anything to merge.
21151 if (StoreNodes.size() < 2)
21152 return false;
21153
21154 // Sort the memory operands according to their distance from the
21155 // base pointer.
21156 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21157 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21158 });
21159
21160 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21161 Attribute::NoImplicitFloat);
21162 bool IsNonTemporalStore = St->isNonTemporal();
21163 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21164 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21165
21166 // Store Merge attempts to merge the lowest stores. This generally
21167 // works out as if successful, as the remaining stores are checked
21168 // after the first collection of stores is merged. However, in the
21169 // case that a non-mergeable store is found first, e.g., {p[-2],
21170 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21171 // mergeable cases. To prevent this, we prune such stores from the
21172 // front of StoreNodes here.
21173 bool MadeChange = false;
21174 while (StoreNodes.size() > 1) {
21175 unsigned NumConsecutiveStores =
21176 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21177 // There are no more stores in the list to examine.
21178 if (NumConsecutiveStores == 0)
21179 return MadeChange;
21180
21181 // We have at least 2 consecutive stores. Try to merge them.
21182 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21183 switch (StoreSrc) {
21184 case StoreSource::Constant:
21185 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21186 MemVT, RootNode, AllowVectors);
21187 break;
21188
21189 case StoreSource::Extract:
21190 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21191 MemVT, RootNode);
21192 break;
21193
21194 case StoreSource::Load:
21195 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21196 MemVT, RootNode, AllowVectors,
21197 IsNonTemporalStore, IsNonTemporalLoad);
21198 break;
21199
21200 default:
21201 llvm_unreachable("Unhandled store source type");
21202 }
21203 }
21204 return MadeChange;
21205}
21206
21207SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21208 SDLoc SL(ST);
21209 SDValue ReplStore;
21210
21211 // Replace the chain to avoid dependency.
21212 if (ST->isTruncatingStore()) {
21213 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21214 ST->getBasePtr(), ST->getMemoryVT(),
21215 ST->getMemOperand());
21216 } else {
21217 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21218 ST->getMemOperand());
21219 }
21220
21221 // Create token to keep both nodes around.
21222 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21223 MVT::Other, ST->getChain(), ReplStore);
21224
21225 // Make sure the new and old chains are cleaned up.
21226 AddToWorklist(Token.getNode());
21227
21228 // Don't add users to work list.
21229 return CombineTo(ST, Token, false);
21230}
21231
21232SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21233 SDValue Value = ST->getValue();
21234 if (Value.getOpcode() == ISD::TargetConstantFP)
21235 return SDValue();
21236
21237 if (!ISD::isNormalStore(ST))
21238 return SDValue();
21239
21240 SDLoc DL(ST);
21241
21242 SDValue Chain = ST->getChain();
21243 SDValue Ptr = ST->getBasePtr();
21244
21245 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21246
21247 // NOTE: If the original store is volatile, this transform must not increase
21248 // the number of stores. For example, on x86-32 an f64 can be stored in one
21249 // processor operation but an i64 (which is not legal) requires two. So the
21250 // transform should not be done in this case.
21251
21252 SDValue Tmp;
21253 switch (CFP->getSimpleValueType(0).SimpleTy) {
21254 default:
21255 llvm_unreachable("Unknown FP type");
21256 case MVT::f16: // We don't do this for these yet.
21257 case MVT::bf16:
21258 case MVT::f80:
21259 case MVT::f128:
21260 case MVT::ppcf128:
21261 return SDValue();
21262 case MVT::f32:
21263 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21264 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21265 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21266 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21267 MVT::i32);
21268 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21269 }
21270
21271 return SDValue();
21272 case MVT::f64:
21273 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21274 ST->isSimple()) ||
21275 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21276 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21277 getZExtValue(), SDLoc(CFP), MVT::i64);
21278 return DAG.getStore(Chain, DL, Tmp,
21279 Ptr, ST->getMemOperand());
21280 }
21281
21282 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21283 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21284 // Many FP stores are not made apparent until after legalize, e.g. for
21285 // argument passing. Since this is so common, custom legalize the
21286 // 64-bit integer store into two 32-bit stores.
21288 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21289 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21290 if (DAG.getDataLayout().isBigEndian())
21291 std::swap(Lo, Hi);
21292
21293 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21294 AAMDNodes AAInfo = ST->getAAInfo();
21295
21296 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21297 ST->getOriginalAlign(), MMOFlags, AAInfo);
21299 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21300 ST->getPointerInfo().getWithOffset(4),
21301 ST->getOriginalAlign(), MMOFlags, AAInfo);
21302 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21303 St0, St1);
21304 }
21305
21306 return SDValue();
21307 }
21308}
21309
21310// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21311//
21312// If a store of a load with an element inserted into it has no other
21313// uses in between the chain, then we can consider the vector store
21314// dead and replace it with just the single scalar element store.
21315SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21316 SDLoc DL(ST);
21317 SDValue Value = ST->getValue();
21318 SDValue Ptr = ST->getBasePtr();
21319 SDValue Chain = ST->getChain();
21320 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21321 return SDValue();
21322
21323 SDValue Elt = Value.getOperand(1);
21324 SDValue Idx = Value.getOperand(2);
21325
21326 // If the element isn't byte sized or is implicitly truncated then we can't
21327 // compute an offset.
21328 EVT EltVT = Elt.getValueType();
21329 if (!EltVT.isByteSized() ||
21330 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21331 return SDValue();
21332
21333 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21334 if (!Ld || Ld->getBasePtr() != Ptr ||
21335 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21336 !ISD::isNormalStore(ST) ||
21337 Ld->getAddressSpace() != ST->getAddressSpace() ||
21339 return SDValue();
21340
21341 unsigned IsFast;
21342 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21343 Elt.getValueType(), ST->getAddressSpace(),
21344 ST->getAlign(), ST->getMemOperand()->getFlags(),
21345 &IsFast) ||
21346 !IsFast)
21347 return SDValue();
21348
21349 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21350
21351 // If the offset is a known constant then try to recover the pointer
21352 // info
21353 SDValue NewPtr;
21354 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21355 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21356 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21357 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21358 } else {
21359 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21360 }
21361
21362 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21363 ST->getMemOperand()->getFlags());
21364}
21365
21366SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21367 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21368 SDValue Val = ST->getVal();
21369 EVT VT = Val.getValueType();
21370 EVT MemVT = ST->getMemoryVT();
21371
21372 if (MemVT.bitsLT(VT)) { // Is truncating store
21373 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21374 MemVT.getScalarSizeInBits());
21375 // See if we can simplify the operation with SimplifyDemandedBits, which
21376 // only works if the value has a single use.
21377 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21378 return SDValue(N, 0);
21379 }
21380
21381 return SDValue();
21382}
21383
21384SDValue DAGCombiner::visitSTORE(SDNode *N) {
21385 StoreSDNode *ST = cast<StoreSDNode>(N);
21386 SDValue Chain = ST->getChain();
21387 SDValue Value = ST->getValue();
21388 SDValue Ptr = ST->getBasePtr();
21389
21390 // If this is a store of a bit convert, store the input value if the
21391 // resultant store does not need a higher alignment than the original.
21392 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21393 ST->isUnindexed()) {
21394 EVT SVT = Value.getOperand(0).getValueType();
21395 // If the store is volatile, we only want to change the store type if the
21396 // resulting store is legal. Otherwise we might increase the number of
21397 // memory accesses. We don't care if the original type was legal or not
21398 // as we assume software couldn't rely on the number of accesses of an
21399 // illegal type.
21400 // TODO: May be able to relax for unordered atomics (see D66309)
21401 if (((!LegalOperations && ST->isSimple()) ||
21402 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21403 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21404 DAG, *ST->getMemOperand())) {
21405 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21406 ST->getMemOperand());
21407 }
21408 }
21409
21410 // Turn 'store undef, Ptr' -> nothing.
21411 if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
21412 return Chain;
21413
21414 // Try to infer better alignment information than the store already has.
21415 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21416 !ST->isAtomic()) {
21417 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21418 if (*Alignment > ST->getAlign() &&
21419 isAligned(*Alignment, ST->getSrcValueOffset())) {
21420 SDValue NewStore =
21421 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21422 ST->getMemoryVT(), *Alignment,
21423 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21424 // NewStore will always be N as we are only refining the alignment
21425 assert(NewStore.getNode() == N);
21426 (void)NewStore;
21427 }
21428 }
21429 }
21430
21431 // Try transforming a pair floating point load / store ops to integer
21432 // load / store ops.
21433 if (SDValue NewST = TransformFPLoadStorePair(N))
21434 return NewST;
21435
21436 // Try transforming several stores into STORE (BSWAP).
21437 if (SDValue Store = mergeTruncStores(ST))
21438 return Store;
21439
21440 if (ST->isUnindexed()) {
21441 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21442 // adjacent stores.
21443 if (findBetterNeighborChains(ST)) {
21444 // replaceStoreChain uses CombineTo, which handled all of the worklist
21445 // manipulation. Return the original node to not do anything else.
21446 return SDValue(ST, 0);
21447 }
21448 Chain = ST->getChain();
21449 }
21450
21451 // FIXME: is there such a thing as a truncating indexed store?
21452 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21453 Value.getValueType().isInteger() &&
21454 (!isa<ConstantSDNode>(Value) ||
21455 !cast<ConstantSDNode>(Value)->isOpaque())) {
21456 // Convert a truncating store of a extension into a standard store.
21457 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21458 Value.getOpcode() == ISD::SIGN_EXTEND ||
21459 Value.getOpcode() == ISD::ANY_EXTEND) &&
21460 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21461 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21462 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21463 ST->getMemOperand());
21464
21465 APInt TruncDemandedBits =
21466 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21467 ST->getMemoryVT().getScalarSizeInBits());
21468
21469 // See if we can simplify the operation with SimplifyDemandedBits, which
21470 // only works if the value has a single use.
21471 AddToWorklist(Value.getNode());
21472 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21473 // Re-visit the store if anything changed and the store hasn't been merged
21474 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21475 // node back to the worklist if necessary, but we also need to re-visit
21476 // the Store node itself.
21477 if (N->getOpcode() != ISD::DELETED_NODE)
21478 AddToWorklist(N);
21479 return SDValue(N, 0);
21480 }
21481
21482 // Otherwise, see if we can simplify the input to this truncstore with
21483 // knowledge that only the low bits are being used. For example:
21484 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21485 if (SDValue Shorter =
21486 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21487 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21488 ST->getMemOperand());
21489
21490 // If we're storing a truncated constant, see if we can simplify it.
21491 // TODO: Move this to targetShrinkDemandedConstant?
21492 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21493 if (!Cst->isOpaque()) {
21494 const APInt &CValue = Cst->getAPIntValue();
21495 APInt NewVal = CValue & TruncDemandedBits;
21496 if (NewVal != CValue) {
21497 SDValue Shorter =
21498 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21499 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21500 ST->getMemoryVT(), ST->getMemOperand());
21501 }
21502 }
21503 }
21504
21505 // If this is a load followed by a store to the same location, then the store
21506 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21507 // TODO: Add big-endian truncate support with test coverage.
21508 // TODO: Can relax for unordered atomics (see D66309)
21509 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21511 : Value;
21512 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21513 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21514 ST->isUnindexed() && ST->isSimple() &&
21515 Ld->getAddressSpace() == ST->getAddressSpace() &&
21516 // There can't be any side effects between the load and store, such as
21517 // a call or store.
21519 // The store is dead, remove it.
21520 return Chain;
21521 }
21522 }
21523
21524 // Try scalarizing vector stores of loads where we only change one element
21525 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21526 return NewST;
21527
21528 // TODO: Can relax for unordered atomics (see D66309)
21529 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21530 if (ST->isUnindexed() && ST->isSimple() &&
21531 ST1->isUnindexed() && ST1->isSimple()) {
21532 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21533 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21534 ST->getAddressSpace() == ST1->getAddressSpace()) {
21535 // If this is a store followed by a store with the same value to the
21536 // same location, then the store is dead/noop.
21537 return Chain;
21538 }
21539
21540 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21541 !ST1->getBasePtr().isUndef() &&
21542 ST->getAddressSpace() == ST1->getAddressSpace()) {
21543 // If we consider two stores and one smaller in size is a scalable
21544 // vector type and another one a bigger size store with a fixed type,
21545 // then we could not allow the scalable store removal because we don't
21546 // know its final size in the end.
21547 if (ST->getMemoryVT().isScalableVector() ||
21548 ST1->getMemoryVT().isScalableVector()) {
21549 if (ST1->getBasePtr() == Ptr &&
21550 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21551 ST->getMemoryVT().getStoreSize())) {
21552 CombineTo(ST1, ST1->getChain());
21553 return SDValue(N, 0);
21554 }
21555 } else {
21556 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21557 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21558 // If this is a store who's preceding store to a subset of the current
21559 // location and no one other node is chained to that store we can
21560 // effectively drop the store. Do not remove stores to undef as they
21561 // may be used as data sinks.
21562 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21563 ChainBase,
21564 ST1->getMemoryVT().getFixedSizeInBits())) {
21565 CombineTo(ST1, ST1->getChain());
21566 return SDValue(N, 0);
21567 }
21568 }
21569 }
21570 }
21571 }
21572
21573 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21574 // truncating store. We can do this even if this is already a truncstore.
21575 if ((Value.getOpcode() == ISD::FP_ROUND ||
21576 Value.getOpcode() == ISD::TRUNCATE) &&
21577 Value->hasOneUse() && ST->isUnindexed() &&
21578 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21579 ST->getMemoryVT(), LegalOperations)) {
21580 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21581 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21582 }
21583
21584 // Always perform this optimization before types are legal. If the target
21585 // prefers, also try this after legalization to catch stores that were created
21586 // by intrinsics or other nodes.
21587 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21588 while (true) {
21589 // There can be multiple store sequences on the same chain.
21590 // Keep trying to merge store sequences until we are unable to do so
21591 // or until we merge the last store on the chain.
21592 bool Changed = mergeConsecutiveStores(ST);
21593 if (!Changed) break;
21594 // Return N as merge only uses CombineTo and no worklist clean
21595 // up is necessary.
21596 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21597 return SDValue(N, 0);
21598 }
21599 }
21600
21601 // Try transforming N to an indexed store.
21602 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21603 return SDValue(N, 0);
21604
21605 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21606 //
21607 // Make sure to do this only after attempting to merge stores in order to
21608 // avoid changing the types of some subset of stores due to visit order,
21609 // preventing their merging.
21610 if (isa<ConstantFPSDNode>(ST->getValue())) {
21611 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21612 return NewSt;
21613 }
21614
21615 if (SDValue NewSt = splitMergedValStore(ST))
21616 return NewSt;
21617
21618 return ReduceLoadOpStoreWidth(N);
21619}
21620
21621SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21622 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21623 if (!LifetimeEnd->hasOffset())
21624 return SDValue();
21625
21626 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21627 LifetimeEnd->getOffset(), false);
21628
21629 // We walk up the chains to find stores.
21630 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21631 while (!Chains.empty()) {
21632 SDValue Chain = Chains.pop_back_val();
21633 if (!Chain.hasOneUse())
21634 continue;
21635 switch (Chain.getOpcode()) {
21636 case ISD::TokenFactor:
21637 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21638 Chains.push_back(Chain.getOperand(--Nops));
21639 break;
21641 case ISD::LIFETIME_END:
21642 // We can forward past any lifetime start/end that can be proven not to
21643 // alias the node.
21644 if (!mayAlias(Chain.getNode(), N))
21645 Chains.push_back(Chain.getOperand(0));
21646 break;
21647 case ISD::STORE: {
21648 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21649 // TODO: Can relax for unordered atomics (see D66309)
21650 if (!ST->isSimple() || ST->isIndexed())
21651 continue;
21652 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21653 // The bounds of a scalable store are not known until runtime, so this
21654 // store cannot be elided.
21655 if (StoreSize.isScalable())
21656 continue;
21657 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21658 // If we store purely within object bounds just before its lifetime ends,
21659 // we can remove the store.
21660 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21661 StoreSize.getFixedValue() * 8)) {
21662 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21663 dbgs() << "\nwithin LIFETIME_END of : ";
21664 LifetimeEndBase.dump(); dbgs() << "\n");
21665 CombineTo(ST, ST->getChain());
21666 return SDValue(N, 0);
21667 }
21668 }
21669 }
21670 }
21671 return SDValue();
21672}
21673
21674/// For the instruction sequence of store below, F and I values
21675/// are bundled together as an i64 value before being stored into memory.
21676/// Sometimes it is more efficent to generate separate stores for F and I,
21677/// which can remove the bitwise instructions or sink them to colder places.
21678///
21679/// (store (or (zext (bitcast F to i32) to i64),
21680/// (shl (zext I to i64), 32)), addr) -->
21681/// (store F, addr) and (store I, addr+4)
21682///
21683/// Similarly, splitting for other merged store can also be beneficial, like:
21684/// For pair of {i32, i32}, i64 store --> two i32 stores.
21685/// For pair of {i32, i16}, i64 store --> two i32 stores.
21686/// For pair of {i16, i16}, i32 store --> two i16 stores.
21687/// For pair of {i16, i8}, i32 store --> two i16 stores.
21688/// For pair of {i8, i8}, i16 store --> two i8 stores.
21689///
21690/// We allow each target to determine specifically which kind of splitting is
21691/// supported.
21692///
21693/// The store patterns are commonly seen from the simple code snippet below
21694/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21695/// void goo(const std::pair<int, float> &);
21696/// hoo() {
21697/// ...
21698/// goo(std::make_pair(tmp, ftmp));
21699/// ...
21700/// }
21701///
21702SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21703 if (OptLevel == CodeGenOptLevel::None)
21704 return SDValue();
21705
21706 // Can't change the number of memory accesses for a volatile store or break
21707 // atomicity for an atomic one.
21708 if (!ST->isSimple())
21709 return SDValue();
21710
21711 SDValue Val = ST->getValue();
21712 SDLoc DL(ST);
21713
21714 // Match OR operand.
21715 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21716 return SDValue();
21717
21718 // Match SHL operand and get Lower and Higher parts of Val.
21719 SDValue Op1 = Val.getOperand(0);
21720 SDValue Op2 = Val.getOperand(1);
21721 SDValue Lo, Hi;
21722 if (Op1.getOpcode() != ISD::SHL) {
21723 std::swap(Op1, Op2);
21724 if (Op1.getOpcode() != ISD::SHL)
21725 return SDValue();
21726 }
21727 Lo = Op2;
21728 Hi = Op1.getOperand(0);
21729 if (!Op1.hasOneUse())
21730 return SDValue();
21731
21732 // Match shift amount to HalfValBitSize.
21733 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21734 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21735 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21736 return SDValue();
21737
21738 // Lo and Hi are zero-extended from int with size less equal than 32
21739 // to i64.
21740 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21741 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21742 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21743 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21744 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21745 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21746 return SDValue();
21747
21748 // Use the EVT of low and high parts before bitcast as the input
21749 // of target query.
21750 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21751 ? Lo.getOperand(0).getValueType()
21752 : Lo.getValueType();
21753 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21754 ? Hi.getOperand(0).getValueType()
21755 : Hi.getValueType();
21756 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21757 return SDValue();
21758
21759 // Start to split store.
21760 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21761 AAMDNodes AAInfo = ST->getAAInfo();
21762
21763 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21764 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21765 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21766 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21767
21768 SDValue Chain = ST->getChain();
21769 SDValue Ptr = ST->getBasePtr();
21770 // Lower value store.
21771 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21772 ST->getOriginalAlign(), MMOFlags, AAInfo);
21773 Ptr =
21774 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21775 // Higher value store.
21776 SDValue St1 = DAG.getStore(
21777 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21778 ST->getOriginalAlign(), MMOFlags, AAInfo);
21779 return St1;
21780}
21781
21782// Merge an insertion into an existing shuffle:
21783// (insert_vector_elt (vector_shuffle X, Y, Mask),
21784// .(extract_vector_elt X, N), InsIndex)
21785// --> (vector_shuffle X, Y, NewMask)
21786// and variations where shuffle operands may be CONCAT_VECTORS.
21788 SmallVectorImpl<int> &NewMask, SDValue Elt,
21789 unsigned InsIndex) {
21790 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21791 !isa<ConstantSDNode>(Elt.getOperand(1)))
21792 return false;
21793
21794 // Vec's operand 0 is using indices from 0 to N-1 and
21795 // operand 1 from N to 2N - 1, where N is the number of
21796 // elements in the vectors.
21797 SDValue InsertVal0 = Elt.getOperand(0);
21798 int ElementOffset = -1;
21799
21800 // We explore the inputs of the shuffle in order to see if we find the
21801 // source of the extract_vector_elt. If so, we can use it to modify the
21802 // shuffle rather than perform an insert_vector_elt.
21804 ArgWorkList.emplace_back(Mask.size(), Y);
21805 ArgWorkList.emplace_back(0, X);
21806
21807 while (!ArgWorkList.empty()) {
21808 int ArgOffset;
21809 SDValue ArgVal;
21810 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21811
21812 if (ArgVal == InsertVal0) {
21813 ElementOffset = ArgOffset;
21814 break;
21815 }
21816
21817 // Peek through concat_vector.
21818 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21819 int CurrentArgOffset =
21820 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21821 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21822 for (SDValue Op : reverse(ArgVal->ops())) {
21823 CurrentArgOffset -= Step;
21824 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21825 }
21826
21827 // Make sure we went through all the elements and did not screw up index
21828 // computation.
21829 assert(CurrentArgOffset == ArgOffset);
21830 }
21831 }
21832
21833 // If we failed to find a match, see if we can replace an UNDEF shuffle
21834 // operand.
21835 if (ElementOffset == -1) {
21836 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21837 return false;
21838 ElementOffset = Mask.size();
21839 Y = InsertVal0;
21840 }
21841
21842 NewMask.assign(Mask.begin(), Mask.end());
21843 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21844 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21845 "NewMask[InsIndex] is out of bound");
21846 return true;
21847}
21848
21849// Merge an insertion into an existing shuffle:
21850// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21851// InsIndex)
21852// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21853// CONCAT_VECTORS.
21854SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21855 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21856 "Expected extract_vector_elt");
21857 SDValue InsertVal = N->getOperand(1);
21858 SDValue Vec = N->getOperand(0);
21859
21860 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21861 if (!SVN || !Vec.hasOneUse())
21862 return SDValue();
21863
21864 ArrayRef<int> Mask = SVN->getMask();
21865 SDValue X = Vec.getOperand(0);
21866 SDValue Y = Vec.getOperand(1);
21867
21868 SmallVector<int, 16> NewMask(Mask);
21869 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21870 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21871 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21872 if (LegalShuffle)
21873 return LegalShuffle;
21874 }
21875
21876 return SDValue();
21877}
21878
21879// Convert a disguised subvector insertion into a shuffle:
21880// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21881// bitcast(shuffle (bitcast V), (extended X), Mask)
21882// Note: We do not use an insert_subvector node because that requires a
21883// legal subvector type.
21884SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21885 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21886 "Expected extract_vector_elt");
21887 SDValue InsertVal = N->getOperand(1);
21888
21889 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21890 !InsertVal.getOperand(0).getValueType().isVector())
21891 return SDValue();
21892
21893 SDValue SubVec = InsertVal.getOperand(0);
21894 SDValue DestVec = N->getOperand(0);
21895 EVT SubVecVT = SubVec.getValueType();
21896 EVT VT = DestVec.getValueType();
21897 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21898 // If the source only has a single vector element, the cost of creating adding
21899 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21900 if (NumSrcElts == 1)
21901 return SDValue();
21902 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21903 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21904
21905 // Step 1: Create a shuffle mask that implements this insert operation. The
21906 // vector that we are inserting into will be operand 0 of the shuffle, so
21907 // those elements are just 'i'. The inserted subvector is in the first
21908 // positions of operand 1 of the shuffle. Example:
21909 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21910 SmallVector<int, 16> Mask(NumMaskVals);
21911 for (unsigned i = 0; i != NumMaskVals; ++i) {
21912 if (i / NumSrcElts == InsIndex)
21913 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21914 else
21915 Mask[i] = i;
21916 }
21917
21918 // Bail out if the target can not handle the shuffle we want to create.
21919 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21920 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21921 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21922 return SDValue();
21923
21924 // Step 2: Create a wide vector from the inserted source vector by appending
21925 // undefined elements. This is the same size as our destination vector.
21926 SDLoc DL(N);
21927 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21928 ConcatOps[0] = SubVec;
21929 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21930
21931 // Step 3: Shuffle in the padded subvector.
21932 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21933 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21934 AddToWorklist(PaddedSubV.getNode());
21935 AddToWorklist(DestVecBC.getNode());
21936 AddToWorklist(Shuf.getNode());
21937 return DAG.getBitcast(VT, Shuf);
21938}
21939
21940// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21941// possible and the new load will be quick. We use more loads but less shuffles
21942// and inserts.
21943SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21944 EVT VT = N->getValueType(0);
21945
21946 // InsIndex is expected to be the first of last lane.
21947 if (!VT.isFixedLengthVector() ||
21948 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21949 return SDValue();
21950
21951 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21952 // depending on the InsIndex.
21953 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21954 SDValue Scalar = N->getOperand(1);
21955 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21956 return InsIndex == P.index() || P.value() < 0 ||
21957 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21958 (InsIndex == VT.getVectorNumElements() - 1 &&
21959 P.value() == (int)P.index() + 1);
21960 }))
21961 return SDValue();
21962
21963 // We optionally skip over an extend so long as both loads are extended in the
21964 // same way from the same type.
21965 unsigned Extend = 0;
21966 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21967 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21968 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21969 Extend = Scalar.getOpcode();
21970 Scalar = Scalar.getOperand(0);
21971 }
21972
21973 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21974 if (!ScalarLoad)
21975 return SDValue();
21976
21977 SDValue Vec = Shuffle->getOperand(0);
21978 if (Extend) {
21979 if (Vec.getOpcode() != Extend)
21980 return SDValue();
21981 Vec = Vec.getOperand(0);
21982 }
21983 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21984 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21985 return SDValue();
21986
21987 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21988 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21989 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21990 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21991 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21992 return SDValue();
21993
21994 // Check that the offset between the pointers to produce a single continuous
21995 // load.
21996 if (InsIndex == 0) {
21997 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21998 -1))
21999 return SDValue();
22000 } else {
22002 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
22003 return SDValue();
22004 }
22005
22006 // And that the new unaligned load will be fast.
22007 unsigned IsFast = 0;
22008 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
22009 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
22010 Vec.getValueType(), VecLoad->getAddressSpace(),
22011 NewAlign, VecLoad->getMemOperand()->getFlags(),
22012 &IsFast) ||
22013 !IsFast)
22014 return SDValue();
22015
22016 // Calculate the new Ptr and create the new load.
22017 SDLoc DL(N);
22018 SDValue Ptr = ScalarLoad->getBasePtr();
22019 if (InsIndex != 0)
22020 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
22021 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
22022 MachinePointerInfo PtrInfo =
22023 InsIndex == 0 ? ScalarLoad->getPointerInfo()
22024 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
22025
22026 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
22027 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
22028 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
22029 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
22030 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
22031}
22032
22033SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
22034 SDValue InVec = N->getOperand(0);
22035 SDValue InVal = N->getOperand(1);
22036 SDValue EltNo = N->getOperand(2);
22037 SDLoc DL(N);
22038
22039 EVT VT = InVec.getValueType();
22040 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
22041
22042 // Insert into out-of-bounds element is undefined.
22043 if (IndexC && VT.isFixedLengthVector() &&
22044 IndexC->getZExtValue() >= VT.getVectorNumElements())
22045 return DAG.getUNDEF(VT);
22046
22047 // Remove redundant insertions:
22048 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
22049 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22050 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
22051 return InVec;
22052
22053 if (!IndexC) {
22054 // If this is variable insert to undef vector, it might be better to splat:
22055 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
22056 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
22057 return DAG.getSplat(VT, DL, InVal);
22058 return SDValue();
22059 }
22060
22061 if (VT.isScalableVector())
22062 return SDValue();
22063
22064 unsigned NumElts = VT.getVectorNumElements();
22065
22066 // We must know which element is being inserted for folds below here.
22067 unsigned Elt = IndexC->getZExtValue();
22068
22069 // Handle <1 x ???> vector insertion special cases.
22070 if (NumElts == 1) {
22071 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
22072 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22073 InVal.getOperand(0).getValueType() == VT &&
22074 isNullConstant(InVal.getOperand(1)))
22075 return InVal.getOperand(0);
22076 }
22077
22078 // Canonicalize insert_vector_elt dag nodes.
22079 // Example:
22080 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
22081 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
22082 //
22083 // Do this only if the child insert_vector node has one use; also
22084 // do this only if indices are both constants and Idx1 < Idx0.
22085 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
22086 && isa<ConstantSDNode>(InVec.getOperand(2))) {
22087 unsigned OtherElt = InVec.getConstantOperandVal(2);
22088 if (Elt < OtherElt) {
22089 // Swap nodes.
22090 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22091 InVec.getOperand(0), InVal, EltNo);
22092 AddToWorklist(NewOp.getNode());
22093 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22094 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22095 }
22096 }
22097
22098 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22099 return Shuf;
22100
22101 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22102 return Shuf;
22103
22104 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22105 return Shuf;
22106
22107 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22108 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22109 // vXi1 vector - we don't need to recurse.
22110 if (NumElts == 1)
22111 return DAG.getBuildVector(VT, DL, {InVal});
22112
22113 // If we haven't already collected the element, insert into the op list.
22114 EVT MaxEltVT = InVal.getValueType();
22115 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22116 unsigned Idx) {
22117 if (!Ops[Idx]) {
22118 Ops[Idx] = Elt;
22119 if (VT.isInteger()) {
22120 EVT EltVT = Elt.getValueType();
22121 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22122 }
22123 }
22124 };
22125
22126 // Ensure all the operands are the same value type, fill any missing
22127 // operands with UNDEF and create the BUILD_VECTOR.
22128 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22129 assert(Ops.size() == NumElts && "Unexpected vector size");
22130 for (SDValue &Op : Ops) {
22131 if (Op)
22132 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22133 else
22134 Op = DAG.getUNDEF(MaxEltVT);
22135 }
22136 return DAG.getBuildVector(VT, DL, Ops);
22137 };
22138
22139 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22140 Ops[Elt] = InVal;
22141
22142 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22143 for (SDValue CurVec = InVec; CurVec;) {
22144 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22145 if (CurVec.isUndef())
22146 return CanonicalizeBuildVector(Ops);
22147
22148 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22149 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22150 for (unsigned I = 0; I != NumElts; ++I)
22151 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22152 return CanonicalizeBuildVector(Ops);
22153 }
22154
22155 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22156 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22157 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22158 return CanonicalizeBuildVector(Ops);
22159 }
22160
22161 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22162 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22163 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22164 if (CurIdx->getAPIntValue().ult(NumElts)) {
22165 unsigned Idx = CurIdx->getZExtValue();
22166 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22167
22168 // Found entire BUILD_VECTOR.
22169 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22170 return CanonicalizeBuildVector(Ops);
22171
22172 CurVec = CurVec->getOperand(0);
22173 continue;
22174 }
22175
22176 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22177 // update the shuffle mask (and second operand if we started with unary
22178 // shuffle) and create a new legal shuffle.
22179 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22180 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22181 SDValue LHS = SVN->getOperand(0);
22182 SDValue RHS = SVN->getOperand(1);
22184 bool Merged = true;
22185 for (auto I : enumerate(Ops)) {
22186 SDValue &Op = I.value();
22187 if (Op) {
22188 SmallVector<int, 16> NewMask;
22189 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22190 Merged = false;
22191 break;
22192 }
22193 Mask = std::move(NewMask);
22194 }
22195 }
22196 if (Merged)
22197 if (SDValue NewShuffle =
22198 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22199 return NewShuffle;
22200 }
22201
22202 // If all insertions are zero value, try to convert to AND mask.
22203 // TODO: Do this for -1 with OR mask?
22204 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22205 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22206 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22207 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22208 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22210 for (unsigned I = 0; I != NumElts; ++I)
22211 Mask[I] = Ops[I] ? Zero : AllOnes;
22212 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22213 DAG.getBuildVector(VT, DL, Mask));
22214 }
22215
22216 // Failed to find a match in the chain - bail.
22217 break;
22218 }
22219
22220 // See if we can fill in the missing constant elements as zeros.
22221 // TODO: Should we do this for any constant?
22222 APInt DemandedZeroElts = APInt::getZero(NumElts);
22223 for (unsigned I = 0; I != NumElts; ++I)
22224 if (!Ops[I])
22225 DemandedZeroElts.setBit(I);
22226
22227 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22228 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22229 : DAG.getConstantFP(0, DL, MaxEltVT);
22230 for (unsigned I = 0; I != NumElts; ++I)
22231 if (!Ops[I])
22232 Ops[I] = Zero;
22233
22234 return CanonicalizeBuildVector(Ops);
22235 }
22236 }
22237
22238 return SDValue();
22239}
22240
22241SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22242 SDValue EltNo,
22243 LoadSDNode *OriginalLoad) {
22244 assert(OriginalLoad->isSimple());
22245
22246 EVT ResultVT = EVE->getValueType(0);
22247 EVT VecEltVT = InVecVT.getVectorElementType();
22248
22249 // If the vector element type is not a multiple of a byte then we are unable
22250 // to correctly compute an address to load only the extracted element as a
22251 // scalar.
22252 if (!VecEltVT.isByteSized())
22253 return SDValue();
22254
22255 ISD::LoadExtType ExtTy =
22256 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22257 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22258 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22259 return SDValue();
22260
22261 Align Alignment = OriginalLoad->getAlign();
22263 SDLoc DL(EVE);
22264 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22265 int Elt = ConstEltNo->getZExtValue();
22266 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22267 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22268 Alignment = commonAlignment(Alignment, PtrOff);
22269 } else {
22270 // Discard the pointer info except the address space because the memory
22271 // operand can't represent this new access since the offset is variable.
22272 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22273 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22274 }
22275
22276 unsigned IsFast = 0;
22277 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22278 OriginalLoad->getAddressSpace(), Alignment,
22279 OriginalLoad->getMemOperand()->getFlags(),
22280 &IsFast) ||
22281 !IsFast)
22282 return SDValue();
22283
22284 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22285 InVecVT, EltNo);
22286
22287 // We are replacing a vector load with a scalar load. The new load must have
22288 // identical memory op ordering to the original.
22289 SDValue Load;
22290 if (ResultVT.bitsGT(VecEltVT)) {
22291 // If the result type of vextract is wider than the load, then issue an
22292 // extending load instead.
22293 ISD::LoadExtType ExtType =
22294 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22295 : ISD::EXTLOAD;
22296 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22297 NewPtr, MPI, VecEltVT, Alignment,
22298 OriginalLoad->getMemOperand()->getFlags(),
22299 OriginalLoad->getAAInfo());
22300 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22301 } else {
22302 // The result type is narrower or the same width as the vector element
22303 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22304 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22305 OriginalLoad->getAAInfo());
22306 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22307 if (ResultVT.bitsLT(VecEltVT))
22308 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22309 else
22310 Load = DAG.getBitcast(ResultVT, Load);
22311 }
22312 ++OpsNarrowed;
22313 return Load;
22314}
22315
22316/// Transform a vector binary operation into a scalar binary operation by moving
22317/// the math/logic after an extract element of a vector.
22319 const SDLoc &DL, bool LegalOperations) {
22320 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22321 SDValue Vec = ExtElt->getOperand(0);
22322 SDValue Index = ExtElt->getOperand(1);
22323 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22324 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22325 Vec->getNumValues() != 1)
22326 return SDValue();
22327
22328 // Targets may want to avoid this to prevent an expensive register transfer.
22329 if (!TLI.shouldScalarizeBinop(Vec))
22330 return SDValue();
22331
22332 // Extracting an element of a vector constant is constant-folded, so this
22333 // transform is just replacing a vector op with a scalar op while moving the
22334 // extract.
22335 SDValue Op0 = Vec.getOperand(0);
22336 SDValue Op1 = Vec.getOperand(1);
22337 APInt SplatVal;
22338 if (isAnyConstantBuildVector(Op0, true) ||
22339 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22340 isAnyConstantBuildVector(Op1, true) ||
22341 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22342 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22343 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22344 EVT VT = ExtElt->getValueType(0);
22345 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22346 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22347 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22348 }
22349
22350 return SDValue();
22351}
22352
22353// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22354// recursively analyse all of it's users. and try to model themselves as
22355// bit sequence extractions. If all of them agree on the new, narrower element
22356// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22357// new element type, do so now.
22358// This is mainly useful to recover from legalization that scalarized
22359// the vector as wide elements, but tries to rebuild it with narrower elements.
22360//
22361// Some more nodes could be modelled if that helps cover interesting patterns.
22362bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22363 SDNode *N) {
22364 // We perform this optimization post type-legalization because
22365 // the type-legalizer often scalarizes integer-promoted vectors.
22366 // Performing this optimization before may cause legalizaton cycles.
22367 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22368 return false;
22369
22370 // TODO: Add support for big-endian.
22371 if (DAG.getDataLayout().isBigEndian())
22372 return false;
22373
22374 SDValue VecOp = N->getOperand(0);
22375 EVT VecVT = VecOp.getValueType();
22376 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22377
22378 // We must start with a constant extraction index.
22379 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22380 if (!IndexC)
22381 return false;
22382
22383 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22384 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22385
22386 // TODO: deal with the case of implicit anyext of the extraction.
22387 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22388 EVT ScalarVT = N->getValueType(0);
22389 if (VecVT.getScalarType() != ScalarVT)
22390 return false;
22391
22392 // TODO: deal with the cases other than everything being integer-typed.
22393 if (!ScalarVT.isScalarInteger())
22394 return false;
22395
22396 struct Entry {
22398
22399 // Which bits of VecOp does it contain?
22400 unsigned BitPos;
22401 int NumBits;
22402 // NOTE: the actual width of \p Producer may be wider than NumBits!
22403
22404 Entry(Entry &&) = default;
22405 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22406 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22407
22408 Entry() = delete;
22409 Entry(const Entry &) = delete;
22410 Entry &operator=(const Entry &) = delete;
22411 Entry &operator=(Entry &&) = delete;
22412 };
22413 SmallVector<Entry, 32> Worklist;
22415
22416 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22417 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22418 /*NumBits=*/VecEltBitWidth);
22419
22420 while (!Worklist.empty()) {
22421 Entry E = Worklist.pop_back_val();
22422 // Does the node not even use any of the VecOp bits?
22423 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22424 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22425 return false; // Let's allow the other combines clean this up first.
22426 // Did we fail to model any of the users of the Producer?
22427 bool ProducerIsLeaf = false;
22428 // Look at each user of this Producer.
22429 for (SDNode *User : E.Producer->uses()) {
22430 switch (User->getOpcode()) {
22431 // TODO: support ISD::BITCAST
22432 // TODO: support ISD::ANY_EXTEND
22433 // TODO: support ISD::ZERO_EXTEND
22434 // TODO: support ISD::SIGN_EXTEND
22435 case ISD::TRUNCATE:
22436 // Truncation simply means we keep position, but extract less bits.
22437 Worklist.emplace_back(User, E.BitPos,
22438 /*NumBits=*/User->getValueSizeInBits(0));
22439 break;
22440 // TODO: support ISD::SRA
22441 // TODO: support ISD::SHL
22442 case ISD::SRL:
22443 // We should be shifting the Producer by a constant amount.
22444 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22445 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22446 // Logical right-shift means that we start extraction later,
22447 // but stop it at the same position we did previously.
22448 unsigned ShAmt = ShAmtC->getZExtValue();
22449 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22450 break;
22451 }
22452 [[fallthrough]];
22453 default:
22454 // We can not model this user of the Producer.
22455 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22456 ProducerIsLeaf = true;
22457 // Profitability check: all users that we can not model
22458 // must be ISD::BUILD_VECTOR's.
22459 if (User->getOpcode() != ISD::BUILD_VECTOR)
22460 return false;
22461 break;
22462 }
22463 }
22464 if (ProducerIsLeaf)
22465 Leafs.emplace_back(std::move(E));
22466 }
22467
22468 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22469
22470 // If we are still at the same element granularity, give up,
22471 if (NewVecEltBitWidth == VecEltBitWidth)
22472 return false;
22473
22474 // The vector width must be a multiple of the new element width.
22475 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22476 return false;
22477
22478 // All leafs must agree on the new element width.
22479 // All leafs must not expect any "padding" bits ontop of that width.
22480 // All leafs must start extraction from multiple of that width.
22481 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22482 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22483 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22484 E.BitPos % NewVecEltBitWidth == 0;
22485 }))
22486 return false;
22487
22488 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22489 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22490 VecVT.getSizeInBits() / NewVecEltBitWidth);
22491
22492 if (LegalTypes &&
22493 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22494 return false;
22495
22496 if (LegalOperations &&
22497 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22499 return false;
22500
22501 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22502 for (const Entry &E : Leafs) {
22503 SDLoc DL(E.Producer);
22504 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22505 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22506 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22507 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22508 DAG.getVectorIdxConstant(NewIndex, DL));
22509 CombineTo(E.Producer, V);
22510 }
22511
22512 return true;
22513}
22514
22515SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22516 SDValue VecOp = N->getOperand(0);
22517 SDValue Index = N->getOperand(1);
22518 EVT ScalarVT = N->getValueType(0);
22519 EVT VecVT = VecOp.getValueType();
22520 if (VecOp.isUndef())
22521 return DAG.getUNDEF(ScalarVT);
22522
22523 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22524 //
22525 // This only really matters if the index is non-constant since other combines
22526 // on the constant elements already work.
22527 SDLoc DL(N);
22528 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22529 Index == VecOp.getOperand(2)) {
22530 SDValue Elt = VecOp.getOperand(1);
22531 AddUsersToWorklist(VecOp.getNode());
22532 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22533 }
22534
22535 // (vextract (scalar_to_vector val, 0) -> val
22536 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22537 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22538 if (DAG.isKnownNeverZero(Index))
22539 return DAG.getUNDEF(ScalarVT);
22540
22541 // Check if the result type doesn't match the inserted element type.
22542 // The inserted element and extracted element may have mismatched bitwidth.
22543 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22544 SDValue InOp = VecOp.getOperand(0);
22545 if (InOp.getValueType() != ScalarVT) {
22546 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22547 if (InOp.getValueType().bitsGT(ScalarVT))
22548 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22549 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22550 }
22551 return InOp;
22552 }
22553
22554 // extract_vector_elt of out-of-bounds element -> UNDEF
22555 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22556 if (IndexC && VecVT.isFixedLengthVector() &&
22557 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22558 return DAG.getUNDEF(ScalarVT);
22559
22560 // extract_vector_elt (build_vector x, y), 1 -> y
22561 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22562 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22563 TLI.isTypeLegal(VecVT)) {
22564 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22565 VecVT.isFixedLengthVector()) &&
22566 "BUILD_VECTOR used for scalable vectors");
22567 unsigned IndexVal =
22568 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22569 SDValue Elt = VecOp.getOperand(IndexVal);
22570 EVT InEltVT = Elt.getValueType();
22571
22572 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22573 isNullConstant(Elt)) {
22574 // Sometimes build_vector's scalar input types do not match result type.
22575 if (ScalarVT == InEltVT)
22576 return Elt;
22577
22578 // TODO: It may be useful to truncate if free if the build_vector
22579 // implicitly converts.
22580 }
22581 }
22582
22583 if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations))
22584 return BO;
22585
22586 if (VecVT.isScalableVector())
22587 return SDValue();
22588
22589 // All the code from this point onwards assumes fixed width vectors, but it's
22590 // possible that some of the combinations could be made to work for scalable
22591 // vectors too.
22592 unsigned NumElts = VecVT.getVectorNumElements();
22593 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22594
22595 // See if the extracted element is constant, in which case fold it if its
22596 // a legal fp immediate.
22597 if (IndexC && ScalarVT.isFloatingPoint()) {
22598 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22599 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22600 if (KnownElt.isConstant()) {
22601 APFloat CstFP =
22602 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22603 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22604 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22605 }
22606 }
22607
22608 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22609 // there are regressions on multiple targets without it. We can end up with a
22610 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22611 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22612 VecOp.hasOneUse()) {
22613 // The vector index of the LSBs of the source depend on the endian-ness.
22614 bool IsLE = DAG.getDataLayout().isLittleEndian();
22615 unsigned ExtractIndex = IndexC->getZExtValue();
22616 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22617 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22618 SDValue BCSrc = VecOp.getOperand(0);
22619 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22620 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22621
22622 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22623 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22624 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22625 // trunc i64 X to i32
22626 SDValue X = BCSrc.getOperand(0);
22627 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22628 "Extract element and scalar to vector can't change element type "
22629 "from FP to integer.");
22630 unsigned XBitWidth = X.getValueSizeInBits();
22631 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22632
22633 // An extract element return value type can be wider than its vector
22634 // operand element type. In that case, the high bits are undefined, so
22635 // it's possible that we may need to extend rather than truncate.
22636 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22637 assert(XBitWidth % VecEltBitWidth == 0 &&
22638 "Scalar bitwidth must be a multiple of vector element bitwidth");
22639 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22640 }
22641 }
22642 }
22643
22644 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22645 // We only perform this optimization before the op legalization phase because
22646 // we may introduce new vector instructions which are not backed by TD
22647 // patterns. For example on AVX, extracting elements from a wide vector
22648 // without using extract_subvector. However, if we can find an underlying
22649 // scalar value, then we can always use that.
22650 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22651 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22652 // Find the new index to extract from.
22653 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22654
22655 // Extracting an undef index is undef.
22656 if (OrigElt == -1)
22657 return DAG.getUNDEF(ScalarVT);
22658
22659 // Select the right vector half to extract from.
22660 SDValue SVInVec;
22661 if (OrigElt < (int)NumElts) {
22662 SVInVec = VecOp.getOperand(0);
22663 } else {
22664 SVInVec = VecOp.getOperand(1);
22665 OrigElt -= NumElts;
22666 }
22667
22668 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22669 SDValue InOp = SVInVec.getOperand(OrigElt);
22670 if (InOp.getValueType() != ScalarVT) {
22671 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22672 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22673 }
22674
22675 return InOp;
22676 }
22677
22678 // FIXME: We should handle recursing on other vector shuffles and
22679 // scalar_to_vector here as well.
22680
22681 if (!LegalOperations ||
22682 // FIXME: Should really be just isOperationLegalOrCustom.
22685 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22686 DAG.getVectorIdxConstant(OrigElt, DL));
22687 }
22688 }
22689
22690 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22691 // simplify it based on the (valid) extraction indices.
22692 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22693 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22694 Use->getOperand(0) == VecOp &&
22695 isa<ConstantSDNode>(Use->getOperand(1));
22696 })) {
22697 APInt DemandedElts = APInt::getZero(NumElts);
22698 for (SDNode *Use : VecOp->uses()) {
22699 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22700 if (CstElt->getAPIntValue().ult(NumElts))
22701 DemandedElts.setBit(CstElt->getZExtValue());
22702 }
22703 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22704 // We simplified the vector operand of this extract element. If this
22705 // extract is not dead, visit it again so it is folded properly.
22706 if (N->getOpcode() != ISD::DELETED_NODE)
22707 AddToWorklist(N);
22708 return SDValue(N, 0);
22709 }
22710 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22711 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22712 // We simplified the vector operand of this extract element. If this
22713 // extract is not dead, visit it again so it is folded properly.
22714 if (N->getOpcode() != ISD::DELETED_NODE)
22715 AddToWorklist(N);
22716 return SDValue(N, 0);
22717 }
22718 }
22719
22720 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22721 return SDValue(N, 0);
22722
22723 // Everything under here is trying to match an extract of a loaded value.
22724 // If the result of load has to be truncated, then it's not necessarily
22725 // profitable.
22726 bool BCNumEltsChanged = false;
22727 EVT ExtVT = VecVT.getVectorElementType();
22728 EVT LVT = ExtVT;
22729 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22730 return SDValue();
22731
22732 if (VecOp.getOpcode() == ISD::BITCAST) {
22733 // Don't duplicate a load with other uses.
22734 if (!VecOp.hasOneUse())
22735 return SDValue();
22736
22737 EVT BCVT = VecOp.getOperand(0).getValueType();
22738 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22739 return SDValue();
22740 if (NumElts != BCVT.getVectorNumElements())
22741 BCNumEltsChanged = true;
22742 VecOp = VecOp.getOperand(0);
22743 ExtVT = BCVT.getVectorElementType();
22744 }
22745
22746 // extract (vector load $addr), i --> load $addr + i * size
22747 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22748 ISD::isNormalLoad(VecOp.getNode()) &&
22749 !Index->hasPredecessor(VecOp.getNode())) {
22750 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22751 if (VecLoad && VecLoad->isSimple())
22752 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22753 }
22754
22755 // Perform only after legalization to ensure build_vector / vector_shuffle
22756 // optimizations have already been done.
22757 if (!LegalOperations || !IndexC)
22758 return SDValue();
22759
22760 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22761 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22762 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22763 int Elt = IndexC->getZExtValue();
22764 LoadSDNode *LN0 = nullptr;
22765 if (ISD::isNormalLoad(VecOp.getNode())) {
22766 LN0 = cast<LoadSDNode>(VecOp);
22767 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22768 VecOp.getOperand(0).getValueType() == ExtVT &&
22769 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22770 // Don't duplicate a load with other uses.
22771 if (!VecOp.hasOneUse())
22772 return SDValue();
22773
22774 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22775 }
22776 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22777 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22778 // =>
22779 // (load $addr+1*size)
22780
22781 // Don't duplicate a load with other uses.
22782 if (!VecOp.hasOneUse())
22783 return SDValue();
22784
22785 // If the bit convert changed the number of elements, it is unsafe
22786 // to examine the mask.
22787 if (BCNumEltsChanged)
22788 return SDValue();
22789
22790 // Select the input vector, guarding against out of range extract vector.
22791 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22792 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22793
22794 if (VecOp.getOpcode() == ISD::BITCAST) {
22795 // Don't duplicate a load with other uses.
22796 if (!VecOp.hasOneUse())
22797 return SDValue();
22798
22799 VecOp = VecOp.getOperand(0);
22800 }
22801 if (ISD::isNormalLoad(VecOp.getNode())) {
22802 LN0 = cast<LoadSDNode>(VecOp);
22803 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22804 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22805 }
22806 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22807 VecVT.getVectorElementType() == ScalarVT &&
22808 (!LegalTypes ||
22809 TLI.isTypeLegal(
22811 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22812 // -> extract_vector_elt a, 0
22813 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22814 // -> extract_vector_elt a, 1
22815 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22816 // -> extract_vector_elt b, 0
22817 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22818 // -> extract_vector_elt b, 1
22819 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22820 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22821 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22822 Index.getValueType());
22823
22824 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22826 ConcatVT.getVectorElementType(),
22827 ConcatOp, NewIdx);
22828 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22829 }
22830
22831 // Make sure we found a non-volatile load and the extractelement is
22832 // the only use.
22833 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22834 return SDValue();
22835
22836 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22837 if (Elt == -1)
22838 return DAG.getUNDEF(LVT);
22839
22840 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22841}
22842
22843// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22844SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22845 // We perform this optimization post type-legalization because
22846 // the type-legalizer often scalarizes integer-promoted vectors.
22847 // Performing this optimization before may create bit-casts which
22848 // will be type-legalized to complex code sequences.
22849 // We perform this optimization only before the operation legalizer because we
22850 // may introduce illegal operations.
22851 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22852 return SDValue();
22853
22854 unsigned NumInScalars = N->getNumOperands();
22855 SDLoc DL(N);
22856 EVT VT = N->getValueType(0);
22857
22858 // Check to see if this is a BUILD_VECTOR of a bunch of values
22859 // which come from any_extend or zero_extend nodes. If so, we can create
22860 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22861 // optimizations. We do not handle sign-extend because we can't fill the sign
22862 // using shuffles.
22863 EVT SourceType = MVT::Other;
22864 bool AllAnyExt = true;
22865
22866 for (unsigned i = 0; i != NumInScalars; ++i) {
22867 SDValue In = N->getOperand(i);
22868 // Ignore undef inputs.
22869 if (In.isUndef()) continue;
22870
22871 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22872 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22873
22874 // Abort if the element is not an extension.
22875 if (!ZeroExt && !AnyExt) {
22876 SourceType = MVT::Other;
22877 break;
22878 }
22879
22880 // The input is a ZeroExt or AnyExt. Check the original type.
22881 EVT InTy = In.getOperand(0).getValueType();
22882
22883 // Check that all of the widened source types are the same.
22884 if (SourceType == MVT::Other)
22885 // First time.
22886 SourceType = InTy;
22887 else if (InTy != SourceType) {
22888 // Multiple income types. Abort.
22889 SourceType = MVT::Other;
22890 break;
22891 }
22892
22893 // Check if all of the extends are ANY_EXTENDs.
22894 AllAnyExt &= AnyExt;
22895 }
22896
22897 // In order to have valid types, all of the inputs must be extended from the
22898 // same source type and all of the inputs must be any or zero extend.
22899 // Scalar sizes must be a power of two.
22900 EVT OutScalarTy = VT.getScalarType();
22901 bool ValidTypes =
22902 SourceType != MVT::Other &&
22903 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22904 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22905
22906 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22907 // turn into a single shuffle instruction.
22908 if (!ValidTypes)
22909 return SDValue();
22910
22911 // If we already have a splat buildvector, then don't fold it if it means
22912 // introducing zeros.
22913 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22914 return SDValue();
22915
22916 bool isLE = DAG.getDataLayout().isLittleEndian();
22917 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22918 assert(ElemRatio > 1 && "Invalid element size ratio");
22919 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22920 DAG.getConstant(0, DL, SourceType);
22921
22922 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22923 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22924
22925 // Populate the new build_vector
22926 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22927 SDValue Cast = N->getOperand(i);
22928 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22929 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22930 Cast.isUndef()) && "Invalid cast opcode");
22931 SDValue In;
22932 if (Cast.isUndef())
22933 In = DAG.getUNDEF(SourceType);
22934 else
22935 In = Cast->getOperand(0);
22936 unsigned Index = isLE ? (i * ElemRatio) :
22937 (i * ElemRatio + (ElemRatio - 1));
22938
22939 assert(Index < Ops.size() && "Invalid index");
22940 Ops[Index] = In;
22941 }
22942
22943 // The type of the new BUILD_VECTOR node.
22944 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22945 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22946 "Invalid vector size");
22947 // Check if the new vector type is legal.
22948 if (!isTypeLegal(VecVT) ||
22949 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22951 return SDValue();
22952
22953 // Make the new BUILD_VECTOR.
22954 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22955
22956 // The new BUILD_VECTOR node has the potential to be further optimized.
22957 AddToWorklist(BV.getNode());
22958 // Bitcast to the desired type.
22959 return DAG.getBitcast(VT, BV);
22960}
22961
22962// Simplify (build_vec (trunc $1)
22963// (trunc (srl $1 half-width))
22964// (trunc (srl $1 (2 * half-width))))
22965// to (bitcast $1)
22966SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22967 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22968
22969 EVT VT = N->getValueType(0);
22970
22971 // Don't run this before LegalizeTypes if VT is legal.
22972 // Targets may have other preferences.
22973 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22974 return SDValue();
22975
22976 // Only for little endian
22977 if (!DAG.getDataLayout().isLittleEndian())
22978 return SDValue();
22979
22980 SDLoc DL(N);
22981 EVT OutScalarTy = VT.getScalarType();
22982 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22983
22984 // Only for power of two types to be sure that bitcast works well
22985 if (!isPowerOf2_64(ScalarTypeBitsize))
22986 return SDValue();
22987
22988 unsigned NumInScalars = N->getNumOperands();
22989
22990 // Look through bitcasts
22991 auto PeekThroughBitcast = [](SDValue Op) {
22992 if (Op.getOpcode() == ISD::BITCAST)
22993 return Op.getOperand(0);
22994 return Op;
22995 };
22996
22997 // The source value where all the parts are extracted.
22998 SDValue Src;
22999 for (unsigned i = 0; i != NumInScalars; ++i) {
23000 SDValue In = PeekThroughBitcast(N->getOperand(i));
23001 // Ignore undef inputs.
23002 if (In.isUndef()) continue;
23003
23004 if (In.getOpcode() != ISD::TRUNCATE)
23005 return SDValue();
23006
23007 In = PeekThroughBitcast(In.getOperand(0));
23008
23009 if (In.getOpcode() != ISD::SRL) {
23010 // For now only build_vec without shuffling, handle shifts here in the
23011 // future.
23012 if (i != 0)
23013 return SDValue();
23014
23015 Src = In;
23016 } else {
23017 // In is SRL
23018 SDValue part = PeekThroughBitcast(In.getOperand(0));
23019
23020 if (!Src) {
23021 Src = part;
23022 } else if (Src != part) {
23023 // Vector parts do not stem from the same variable
23024 return SDValue();
23025 }
23026
23027 SDValue ShiftAmtVal = In.getOperand(1);
23028 if (!isa<ConstantSDNode>(ShiftAmtVal))
23029 return SDValue();
23030
23031 uint64_t ShiftAmt = In.getConstantOperandVal(1);
23032
23033 // The extracted value is not extracted at the right position
23034 if (ShiftAmt != i * ScalarTypeBitsize)
23035 return SDValue();
23036 }
23037 }
23038
23039 // Only cast if the size is the same
23040 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
23041 return SDValue();
23042
23043 return DAG.getBitcast(VT, Src);
23044}
23045
23046SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
23047 ArrayRef<int> VectorMask,
23048 SDValue VecIn1, SDValue VecIn2,
23049 unsigned LeftIdx, bool DidSplitVec) {
23050 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
23051
23052 EVT VT = N->getValueType(0);
23053 EVT InVT1 = VecIn1.getValueType();
23054 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
23055
23056 unsigned NumElems = VT.getVectorNumElements();
23057 unsigned ShuffleNumElems = NumElems;
23058
23059 // If we artificially split a vector in two already, then the offsets in the
23060 // operands will all be based off of VecIn1, even those in VecIn2.
23061 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
23062
23063 uint64_t VTSize = VT.getFixedSizeInBits();
23064 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
23065 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
23066
23067 assert(InVT2Size <= InVT1Size &&
23068 "Inputs must be sorted to be in non-increasing vector size order.");
23069
23070 // We can't generate a shuffle node with mismatched input and output types.
23071 // Try to make the types match the type of the output.
23072 if (InVT1 != VT || InVT2 != VT) {
23073 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
23074 // If the output vector length is a multiple of both input lengths,
23075 // we can concatenate them and pad the rest with undefs.
23076 unsigned NumConcats = VTSize / InVT1Size;
23077 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
23078 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
23079 ConcatOps[0] = VecIn1;
23080 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
23081 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23082 VecIn2 = SDValue();
23083 } else if (InVT1Size == VTSize * 2) {
23084 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
23085 return SDValue();
23086
23087 if (!VecIn2.getNode()) {
23088 // If we only have one input vector, and it's twice the size of the
23089 // output, split it in two.
23090 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23091 DAG.getVectorIdxConstant(NumElems, DL));
23092 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23093 // Since we now have shorter input vectors, adjust the offset of the
23094 // second vector's start.
23095 Vec2Offset = NumElems;
23096 } else {
23097 assert(InVT2Size <= InVT1Size &&
23098 "Second input is not going to be larger than the first one.");
23099
23100 // VecIn1 is wider than the output, and we have another, possibly
23101 // smaller input. Pad the smaller input with undefs, shuffle at the
23102 // input vector width, and extract the output.
23103 // The shuffle type is different than VT, so check legality again.
23104 if (LegalOperations &&
23106 return SDValue();
23107
23108 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23109 // lower it back into a BUILD_VECTOR. So if the inserted type is
23110 // illegal, don't even try.
23111 if (InVT1 != InVT2) {
23112 if (!TLI.isTypeLegal(InVT2))
23113 return SDValue();
23114 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23115 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23116 }
23117 ShuffleNumElems = NumElems * 2;
23118 }
23119 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23120 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23121 ConcatOps[0] = VecIn2;
23122 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23123 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23124 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23125 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23126 return SDValue();
23127 // If dest vector has less than two elements, then use shuffle and extract
23128 // from larger regs will cost even more.
23129 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23130 return SDValue();
23131 assert(InVT2Size <= InVT1Size &&
23132 "Second input is not going to be larger than the first one.");
23133
23134 // VecIn1 is wider than the output, and we have another, possibly
23135 // smaller input. Pad the smaller input with undefs, shuffle at the
23136 // input vector width, and extract the output.
23137 // The shuffle type is different than VT, so check legality again.
23138 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23139 return SDValue();
23140
23141 if (InVT1 != InVT2) {
23142 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23143 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23144 }
23145 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23146 } else {
23147 // TODO: Support cases where the length mismatch isn't exactly by a
23148 // factor of 2.
23149 // TODO: Move this check upwards, so that if we have bad type
23150 // mismatches, we don't create any DAG nodes.
23151 return SDValue();
23152 }
23153 }
23154
23155 // Initialize mask to undef.
23156 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23157
23158 // Only need to run up to the number of elements actually used, not the
23159 // total number of elements in the shuffle - if we are shuffling a wider
23160 // vector, the high lanes should be set to undef.
23161 for (unsigned i = 0; i != NumElems; ++i) {
23162 if (VectorMask[i] <= 0)
23163 continue;
23164
23165 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23166 if (VectorMask[i] == (int)LeftIdx) {
23167 Mask[i] = ExtIndex;
23168 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23169 Mask[i] = Vec2Offset + ExtIndex;
23170 }
23171 }
23172
23173 // The type the input vectors may have changed above.
23174 InVT1 = VecIn1.getValueType();
23175
23176 // If we already have a VecIn2, it should have the same type as VecIn1.
23177 // If we don't, get an undef/zero vector of the appropriate type.
23178 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23179 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23180
23181 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23182 if (ShuffleNumElems > NumElems)
23183 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23184
23185 return Shuffle;
23186}
23187
23189 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23190
23191 // First, determine where the build vector is not undef.
23192 // TODO: We could extend this to handle zero elements as well as undefs.
23193 int NumBVOps = BV->getNumOperands();
23194 int ZextElt = -1;
23195 for (int i = 0; i != NumBVOps; ++i) {
23196 SDValue Op = BV->getOperand(i);
23197 if (Op.isUndef())
23198 continue;
23199 if (ZextElt == -1)
23200 ZextElt = i;
23201 else
23202 return SDValue();
23203 }
23204 // Bail out if there's no non-undef element.
23205 if (ZextElt == -1)
23206 return SDValue();
23207
23208 // The build vector contains some number of undef elements and exactly
23209 // one other element. That other element must be a zero-extended scalar
23210 // extracted from a vector at a constant index to turn this into a shuffle.
23211 // Also, require that the build vector does not implicitly truncate/extend
23212 // its elements.
23213 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23214 EVT VT = BV->getValueType(0);
23215 SDValue Zext = BV->getOperand(ZextElt);
23216 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23218 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23220 return SDValue();
23221
23222 // The zero-extend must be a multiple of the source size, and we must be
23223 // building a vector of the same size as the source of the extract element.
23224 SDValue Extract = Zext.getOperand(0);
23225 unsigned DestSize = Zext.getValueSizeInBits();
23226 unsigned SrcSize = Extract.getValueSizeInBits();
23227 if (DestSize % SrcSize != 0 ||
23228 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23229 return SDValue();
23230
23231 // Create a shuffle mask that will combine the extracted element with zeros
23232 // and undefs.
23233 int ZextRatio = DestSize / SrcSize;
23234 int NumMaskElts = NumBVOps * ZextRatio;
23235 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23236 for (int i = 0; i != NumMaskElts; ++i) {
23237 if (i / ZextRatio == ZextElt) {
23238 // The low bits of the (potentially translated) extracted element map to
23239 // the source vector. The high bits map to zero. We will use a zero vector
23240 // as the 2nd source operand of the shuffle, so use the 1st element of
23241 // that vector (mask value is number-of-elements) for the high bits.
23242 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23243 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23244 : NumMaskElts;
23245 }
23246
23247 // Undef elements of the build vector remain undef because we initialize
23248 // the shuffle mask with -1.
23249 }
23250
23251 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23252 // bitcast (shuffle V, ZeroVec, VectorMask)
23253 SDLoc DL(BV);
23254 EVT VecVT = Extract.getOperand(0).getValueType();
23255 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23256 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23257 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23258 ZeroVec, ShufMask, DAG);
23259 if (!Shuf)
23260 return SDValue();
23261 return DAG.getBitcast(VT, Shuf);
23262}
23263
23264// FIXME: promote to STLExtras.
23265template <typename R, typename T>
23266static auto getFirstIndexOf(R &&Range, const T &Val) {
23267 auto I = find(Range, Val);
23268 if (I == Range.end())
23269 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23270 return std::distance(Range.begin(), I);
23271}
23272
23273// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23274// operations. If the types of the vectors we're extracting from allow it,
23275// turn this into a vector_shuffle node.
23276SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23277 SDLoc DL(N);
23278 EVT VT = N->getValueType(0);
23279
23280 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23281 if (!isTypeLegal(VT))
23282 return SDValue();
23283
23285 return V;
23286
23287 // May only combine to shuffle after legalize if shuffle is legal.
23288 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23289 return SDValue();
23290
23291 bool UsesZeroVector = false;
23292 unsigned NumElems = N->getNumOperands();
23293
23294 // Record, for each element of the newly built vector, which input vector
23295 // that element comes from. -1 stands for undef, 0 for the zero vector,
23296 // and positive values for the input vectors.
23297 // VectorMask maps each element to its vector number, and VecIn maps vector
23298 // numbers to their initial SDValues.
23299
23300 SmallVector<int, 8> VectorMask(NumElems, -1);
23302 VecIn.push_back(SDValue());
23303
23304 for (unsigned i = 0; i != NumElems; ++i) {
23305 SDValue Op = N->getOperand(i);
23306
23307 if (Op.isUndef())
23308 continue;
23309
23310 // See if we can use a blend with a zero vector.
23311 // TODO: Should we generalize this to a blend with an arbitrary constant
23312 // vector?
23314 UsesZeroVector = true;
23315 VectorMask[i] = 0;
23316 continue;
23317 }
23318
23319 // Not an undef or zero. If the input is something other than an
23320 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23321 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23322 !isa<ConstantSDNode>(Op.getOperand(1)))
23323 return SDValue();
23324 SDValue ExtractedFromVec = Op.getOperand(0);
23325
23326 if (ExtractedFromVec.getValueType().isScalableVector())
23327 return SDValue();
23328
23329 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23330 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23331 return SDValue();
23332
23333 // All inputs must have the same element type as the output.
23334 if (VT.getVectorElementType() !=
23335 ExtractedFromVec.getValueType().getVectorElementType())
23336 return SDValue();
23337
23338 // Have we seen this input vector before?
23339 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23340 // a map back from SDValues to numbers isn't worth it.
23341 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23342 if (Idx == -1) { // A new source vector?
23343 Idx = VecIn.size();
23344 VecIn.push_back(ExtractedFromVec);
23345 }
23346
23347 VectorMask[i] = Idx;
23348 }
23349
23350 // If we didn't find at least one input vector, bail out.
23351 if (VecIn.size() < 2)
23352 return SDValue();
23353
23354 // If all the Operands of BUILD_VECTOR extract from same
23355 // vector, then split the vector efficiently based on the maximum
23356 // vector access index and adjust the VectorMask and
23357 // VecIn accordingly.
23358 bool DidSplitVec = false;
23359 if (VecIn.size() == 2) {
23360 unsigned MaxIndex = 0;
23361 unsigned NearestPow2 = 0;
23362 SDValue Vec = VecIn.back();
23363 EVT InVT = Vec.getValueType();
23364 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23365
23366 for (unsigned i = 0; i < NumElems; i++) {
23367 if (VectorMask[i] <= 0)
23368 continue;
23369 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23370 IndexVec[i] = Index;
23371 MaxIndex = std::max(MaxIndex, Index);
23372 }
23373
23374 NearestPow2 = PowerOf2Ceil(MaxIndex);
23375 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23376 NumElems * 2 < NearestPow2) {
23377 unsigned SplitSize = NearestPow2 / 2;
23378 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23379 InVT.getVectorElementType(), SplitSize);
23380 if (TLI.isTypeLegal(SplitVT) &&
23381 SplitSize + SplitVT.getVectorNumElements() <=
23382 InVT.getVectorNumElements()) {
23383 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23384 DAG.getVectorIdxConstant(SplitSize, DL));
23385 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23386 DAG.getVectorIdxConstant(0, DL));
23387 VecIn.pop_back();
23388 VecIn.push_back(VecIn1);
23389 VecIn.push_back(VecIn2);
23390 DidSplitVec = true;
23391
23392 for (unsigned i = 0; i < NumElems; i++) {
23393 if (VectorMask[i] <= 0)
23394 continue;
23395 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23396 }
23397 }
23398 }
23399 }
23400
23401 // Sort input vectors by decreasing vector element count,
23402 // while preserving the relative order of equally-sized vectors.
23403 // Note that we keep the first "implicit zero vector as-is.
23404 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23405 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23406 [](const SDValue &a, const SDValue &b) {
23407 return a.getValueType().getVectorNumElements() >
23408 b.getValueType().getVectorNumElements();
23409 });
23410
23411 // We now also need to rebuild the VectorMask, because it referenced element
23412 // order in VecIn, and we just sorted them.
23413 for (int &SourceVectorIndex : VectorMask) {
23414 if (SourceVectorIndex <= 0)
23415 continue;
23416 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23417 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23418 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23419 SourceVectorIndex = Idx;
23420 }
23421
23422 VecIn = std::move(SortedVecIn);
23423
23424 // TODO: Should this fire if some of the input vectors has illegal type (like
23425 // it does now), or should we let legalization run its course first?
23426
23427 // Shuffle phase:
23428 // Take pairs of vectors, and shuffle them so that the result has elements
23429 // from these vectors in the correct places.
23430 // For example, given:
23431 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23432 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23433 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23434 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23435 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23436 // We will generate:
23437 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23438 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23439 SmallVector<SDValue, 4> Shuffles;
23440 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23441 unsigned LeftIdx = 2 * In + 1;
23442 SDValue VecLeft = VecIn[LeftIdx];
23443 SDValue VecRight =
23444 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23445
23446 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23447 VecRight, LeftIdx, DidSplitVec))
23448 Shuffles.push_back(Shuffle);
23449 else
23450 return SDValue();
23451 }
23452
23453 // If we need the zero vector as an "ingredient" in the blend tree, add it
23454 // to the list of shuffles.
23455 if (UsesZeroVector)
23456 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23457 : DAG.getConstantFP(0.0, DL, VT));
23458
23459 // If we only have one shuffle, we're done.
23460 if (Shuffles.size() == 1)
23461 return Shuffles[0];
23462
23463 // Update the vector mask to point to the post-shuffle vectors.
23464 for (int &Vec : VectorMask)
23465 if (Vec == 0)
23466 Vec = Shuffles.size() - 1;
23467 else
23468 Vec = (Vec - 1) / 2;
23469
23470 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23471 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23472 // generate:
23473 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23474 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23475 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23476 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23477 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23478 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23479 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23480
23481 // Make sure the initial size of the shuffle list is even.
23482 if (Shuffles.size() % 2)
23483 Shuffles.push_back(DAG.getUNDEF(VT));
23484
23485 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23486 if (CurSize % 2) {
23487 Shuffles[CurSize] = DAG.getUNDEF(VT);
23488 CurSize++;
23489 }
23490 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23491 int Left = 2 * In;
23492 int Right = 2 * In + 1;
23493 SmallVector<int, 8> Mask(NumElems, -1);
23494 SDValue L = Shuffles[Left];
23495 ArrayRef<int> LMask;
23496 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23497 L.use_empty() && L.getOperand(1).isUndef() &&
23498 L.getOperand(0).getValueType() == L.getValueType();
23499 if (IsLeftShuffle) {
23500 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23501 L = L.getOperand(0);
23502 }
23503 SDValue R = Shuffles[Right];
23504 ArrayRef<int> RMask;
23505 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23506 R.use_empty() && R.getOperand(1).isUndef() &&
23507 R.getOperand(0).getValueType() == R.getValueType();
23508 if (IsRightShuffle) {
23509 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23510 R = R.getOperand(0);
23511 }
23512 for (unsigned I = 0; I != NumElems; ++I) {
23513 if (VectorMask[I] == Left) {
23514 Mask[I] = I;
23515 if (IsLeftShuffle)
23516 Mask[I] = LMask[I];
23517 VectorMask[I] = In;
23518 } else if (VectorMask[I] == Right) {
23519 Mask[I] = I + NumElems;
23520 if (IsRightShuffle)
23521 Mask[I] = RMask[I] + NumElems;
23522 VectorMask[I] = In;
23523 }
23524 }
23525
23526 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23527 }
23528 }
23529 return Shuffles[0];
23530}
23531
23532// Try to turn a build vector of zero extends of extract vector elts into a
23533// a vector zero extend and possibly an extract subvector.
23534// TODO: Support sign extend?
23535// TODO: Allow undef elements?
23536SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23537 if (LegalOperations)
23538 return SDValue();
23539
23540 EVT VT = N->getValueType(0);
23541
23542 bool FoundZeroExtend = false;
23543 SDValue Op0 = N->getOperand(0);
23544 auto checkElem = [&](SDValue Op) -> int64_t {
23545 unsigned Opc = Op.getOpcode();
23546 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23547 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23548 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23549 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23550 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23551 return C->getZExtValue();
23552 return -1;
23553 };
23554
23555 // Make sure the first element matches
23556 // (zext (extract_vector_elt X, C))
23557 // Offset must be a constant multiple of the
23558 // known-minimum vector length of the result type.
23559 int64_t Offset = checkElem(Op0);
23560 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23561 return SDValue();
23562
23563 unsigned NumElems = N->getNumOperands();
23564 SDValue In = Op0.getOperand(0).getOperand(0);
23565 EVT InSVT = In.getValueType().getScalarType();
23566 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23567
23568 // Don't create an illegal input type after type legalization.
23569 if (LegalTypes && !TLI.isTypeLegal(InVT))
23570 return SDValue();
23571
23572 // Ensure all the elements come from the same vector and are adjacent.
23573 for (unsigned i = 1; i != NumElems; ++i) {
23574 if ((Offset + i) != checkElem(N->getOperand(i)))
23575 return SDValue();
23576 }
23577
23578 SDLoc DL(N);
23579 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23580 Op0.getOperand(0).getOperand(1));
23581 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23582 VT, In);
23583}
23584
23585// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23586// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23587// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23588// This patten can appear during legalization.
23589//
23590// NOTE: This can be generalized to allow more than a single
23591// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23592SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23593 // Don't run this after legalization. Targets may have other preferences.
23594 if (Level >= AfterLegalizeDAG)
23595 return SDValue();
23596
23597 // FIXME: support big-endian.
23598 if (DAG.getDataLayout().isBigEndian())
23599 return SDValue();
23600
23601 EVT VT = N->getValueType(0);
23602 EVT OpVT = N->getOperand(0).getValueType();
23603 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23604
23605 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23606
23607 if (!TLI.isTypeLegal(OpIntVT) ||
23608 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23609 return SDValue();
23610
23611 unsigned EltBitwidth = VT.getScalarSizeInBits();
23612 // NOTE: the actual width of operands may be wider than that!
23613
23614 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23615 // active bits they all have? We'll want to truncate them all to that width.
23616 unsigned ActiveBits = 0;
23617 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23618 for (auto I : enumerate(N->ops())) {
23619 SDValue Op = I.value();
23620 // FIXME: support UNDEF elements?
23621 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23622 unsigned OpActiveBits =
23623 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23624 if (OpActiveBits == 0) {
23625 KnownZeroOps.setBit(I.index());
23626 continue;
23627 }
23628 // Profitability check: don't allow non-zero constant operands.
23629 return SDValue();
23630 }
23631 // Profitability check: there must only be a single non-zero operand,
23632 // and it must be the first operand of the BUILD_VECTOR.
23633 if (I.index() != 0)
23634 return SDValue();
23635 // The operand must be a zero-extension itself.
23636 // FIXME: this could be generalized to known leading zeros check.
23637 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23638 return SDValue();
23639 unsigned CurrActiveBits =
23640 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23641 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23642 ActiveBits = CurrActiveBits;
23643 // We want to at least halve the element size.
23644 if (2 * ActiveBits > EltBitwidth)
23645 return SDValue();
23646 }
23647
23648 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23649 if (ActiveBits == 0)
23650 return SDValue();
23651
23652 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23653 // into how many chunks can we split our element width?
23654 EVT NewScalarIntVT, NewIntVT;
23655 std::optional<unsigned> Factor;
23656 // We can split the element into at least two chunks, but not into more
23657 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23658 // for which the element width is a multiple of it,
23659 // and the resulting types/operations on that chunk width are legal.
23660 assert(2 * ActiveBits <= EltBitwidth &&
23661 "We know that half or less bits of the element are active.");
23662 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23663 if (EltBitwidth % Scale != 0)
23664 continue;
23665 unsigned ChunkBitwidth = EltBitwidth / Scale;
23666 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23667 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23668 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23669 Scale * N->getNumOperands());
23670 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23671 (LegalOperations &&
23672 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23674 continue;
23675 Factor = Scale;
23676 break;
23677 }
23678 if (!Factor)
23679 return SDValue();
23680
23681 SDLoc DL(N);
23682 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23683
23684 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23686 NewOps.reserve(NewIntVT.getVectorNumElements());
23687 for (auto I : enumerate(N->ops())) {
23688 SDValue Op = I.value();
23689 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23690 unsigned SrcOpIdx = I.index();
23691 if (KnownZeroOps[SrcOpIdx]) {
23692 NewOps.append(*Factor, ZeroOp);
23693 continue;
23694 }
23695 Op = DAG.getBitcast(OpIntVT, Op);
23696 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23697 NewOps.emplace_back(Op);
23698 NewOps.append(*Factor - 1, ZeroOp);
23699 }
23700 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23701 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23702 NewBV = DAG.getBitcast(VT, NewBV);
23703 return NewBV;
23704}
23705
23706SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23707 EVT VT = N->getValueType(0);
23708
23709 // A vector built entirely of undefs is undef.
23711 return DAG.getUNDEF(VT);
23712
23713 // If this is a splat of a bitcast from another vector, change to a
23714 // concat_vector.
23715 // For example:
23716 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23717 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23718 //
23719 // If X is a build_vector itself, the concat can become a larger build_vector.
23720 // TODO: Maybe this is useful for non-splat too?
23721 if (!LegalOperations) {
23722 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23723 // Only change build_vector to a concat_vector if the splat value type is
23724 // same as the vector element type.
23725 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23727 EVT SrcVT = Splat.getValueType();
23728 if (SrcVT.isVector()) {
23729 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23730 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23731 SrcVT.getVectorElementType(), NumElts);
23732 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23733 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23734 SDValue Concat =
23735 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23736 return DAG.getBitcast(VT, Concat);
23737 }
23738 }
23739 }
23740 }
23741
23742 // Check if we can express BUILD VECTOR via subvector extract.
23743 if (!LegalTypes && (N->getNumOperands() > 1)) {
23744 SDValue Op0 = N->getOperand(0);
23745 auto checkElem = [&](SDValue Op) -> uint64_t {
23746 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23747 (Op0.getOperand(0) == Op.getOperand(0)))
23748 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23749 return CNode->getZExtValue();
23750 return -1;
23751 };
23752
23753 int Offset = checkElem(Op0);
23754 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23755 if (Offset + i != checkElem(N->getOperand(i))) {
23756 Offset = -1;
23757 break;
23758 }
23759 }
23760
23761 if ((Offset == 0) &&
23762 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23763 return Op0.getOperand(0);
23764 if ((Offset != -1) &&
23765 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23766 0)) // IDX must be multiple of output size.
23767 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23768 Op0.getOperand(0), Op0.getOperand(1));
23769 }
23770
23771 if (SDValue V = convertBuildVecZextToZext(N))
23772 return V;
23773
23774 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23775 return V;
23776
23777 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23778 return V;
23779
23780 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23781 return V;
23782
23783 if (SDValue V = reduceBuildVecToShuffle(N))
23784 return V;
23785
23786 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23787 // Do this late as some of the above may replace the splat.
23789 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23790 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23791 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23792 }
23793
23794 return SDValue();
23795}
23796
23798 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23799 EVT OpVT = N->getOperand(0).getValueType();
23800
23801 // If the operands are legal vectors, leave them alone.
23802 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23803 return SDValue();
23804
23805 SDLoc DL(N);
23806 EVT VT = N->getValueType(0);
23808 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23809
23810 // Keep track of what we encounter.
23811 bool AnyInteger = false;
23812 bool AnyFP = false;
23813 for (const SDValue &Op : N->ops()) {
23814 if (ISD::BITCAST == Op.getOpcode() &&
23815 !Op.getOperand(0).getValueType().isVector())
23816 Ops.push_back(Op.getOperand(0));
23817 else if (ISD::UNDEF == Op.getOpcode())
23818 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23819 else
23820 return SDValue();
23821
23822 // Note whether we encounter an integer or floating point scalar.
23823 // If it's neither, bail out, it could be something weird like x86mmx.
23824 EVT LastOpVT = Ops.back().getValueType();
23825 if (LastOpVT.isFloatingPoint())
23826 AnyFP = true;
23827 else if (LastOpVT.isInteger())
23828 AnyInteger = true;
23829 else
23830 return SDValue();
23831 }
23832
23833 // If any of the operands is a floating point scalar bitcast to a vector,
23834 // use floating point types throughout, and bitcast everything.
23835 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23836 if (AnyFP) {
23838 if (AnyInteger) {
23839 for (SDValue &Op : Ops) {
23840 if (Op.getValueType() == SVT)
23841 continue;
23842 if (Op.isUndef())
23843 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23844 else
23845 Op = DAG.getBitcast(SVT, Op);
23846 }
23847 }
23848 }
23849
23850 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23851 VT.getSizeInBits() / SVT.getSizeInBits());
23852 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23853}
23854
23855// Attempt to merge nested concat_vectors/undefs.
23856// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23857// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23859 SelectionDAG &DAG) {
23860 EVT VT = N->getValueType(0);
23861
23862 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23863 EVT SubVT;
23864 SDValue FirstConcat;
23865 for (const SDValue &Op : N->ops()) {
23866 if (Op.isUndef())
23867 continue;
23868 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23869 return SDValue();
23870 if (!FirstConcat) {
23871 SubVT = Op.getOperand(0).getValueType();
23872 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23873 return SDValue();
23874 FirstConcat = Op;
23875 continue;
23876 }
23877 if (SubVT != Op.getOperand(0).getValueType())
23878 return SDValue();
23879 }
23880 assert(FirstConcat && "Concat of all-undefs found");
23881
23882 SmallVector<SDValue> ConcatOps;
23883 for (const SDValue &Op : N->ops()) {
23884 if (Op.isUndef()) {
23885 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23886 continue;
23887 }
23888 ConcatOps.append(Op->op_begin(), Op->op_end());
23889 }
23890 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23891}
23892
23893// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23894// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23895// most two distinct vectors the same size as the result, attempt to turn this
23896// into a legal shuffle.
23898 EVT VT = N->getValueType(0);
23899 EVT OpVT = N->getOperand(0).getValueType();
23900
23901 // We currently can't generate an appropriate shuffle for a scalable vector.
23902 if (VT.isScalableVector())
23903 return SDValue();
23904
23905 int NumElts = VT.getVectorNumElements();
23906 int NumOpElts = OpVT.getVectorNumElements();
23907
23908 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23910
23911 for (SDValue Op : N->ops()) {
23913
23914 // UNDEF nodes convert to UNDEF shuffle mask values.
23915 if (Op.isUndef()) {
23916 Mask.append((unsigned)NumOpElts, -1);
23917 continue;
23918 }
23919
23920 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23921 return SDValue();
23922
23923 // What vector are we extracting the subvector from and at what index?
23924 SDValue ExtVec = Op.getOperand(0);
23925 int ExtIdx = Op.getConstantOperandVal(1);
23926
23927 // We want the EVT of the original extraction to correctly scale the
23928 // extraction index.
23929 EVT ExtVT = ExtVec.getValueType();
23930 ExtVec = peekThroughBitcasts(ExtVec);
23931
23932 // UNDEF nodes convert to UNDEF shuffle mask values.
23933 if (ExtVec.isUndef()) {
23934 Mask.append((unsigned)NumOpElts, -1);
23935 continue;
23936 }
23937
23938 // Ensure that we are extracting a subvector from a vector the same
23939 // size as the result.
23940 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23941 return SDValue();
23942
23943 // Scale the subvector index to account for any bitcast.
23944 int NumExtElts = ExtVT.getVectorNumElements();
23945 if (0 == (NumExtElts % NumElts))
23946 ExtIdx /= (NumExtElts / NumElts);
23947 else if (0 == (NumElts % NumExtElts))
23948 ExtIdx *= (NumElts / NumExtElts);
23949 else
23950 return SDValue();
23951
23952 // At most we can reference 2 inputs in the final shuffle.
23953 if (SV0.isUndef() || SV0 == ExtVec) {
23954 SV0 = ExtVec;
23955 for (int i = 0; i != NumOpElts; ++i)
23956 Mask.push_back(i + ExtIdx);
23957 } else if (SV1.isUndef() || SV1 == ExtVec) {
23958 SV1 = ExtVec;
23959 for (int i = 0; i != NumOpElts; ++i)
23960 Mask.push_back(i + ExtIdx + NumElts);
23961 } else {
23962 return SDValue();
23963 }
23964 }
23965
23966 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23967 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23968 DAG.getBitcast(VT, SV1), Mask, DAG);
23969}
23970
23972 unsigned CastOpcode = N->getOperand(0).getOpcode();
23973 switch (CastOpcode) {
23974 case ISD::SINT_TO_FP:
23975 case ISD::UINT_TO_FP:
23976 case ISD::FP_TO_SINT:
23977 case ISD::FP_TO_UINT:
23978 // TODO: Allow more opcodes?
23979 // case ISD::BITCAST:
23980 // case ISD::TRUNCATE:
23981 // case ISD::ZERO_EXTEND:
23982 // case ISD::SIGN_EXTEND:
23983 // case ISD::FP_EXTEND:
23984 break;
23985 default:
23986 return SDValue();
23987 }
23988
23989 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23990 if (!SrcVT.isVector())
23991 return SDValue();
23992
23993 // All operands of the concat must be the same kind of cast from the same
23994 // source type.
23996 for (SDValue Op : N->ops()) {
23997 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23998 Op.getOperand(0).getValueType() != SrcVT)
23999 return SDValue();
24000 SrcOps.push_back(Op.getOperand(0));
24001 }
24002
24003 // The wider cast must be supported by the target. This is unusual because
24004 // the operation support type parameter depends on the opcode. In addition,
24005 // check the other type in the cast to make sure this is really legal.
24006 EVT VT = N->getValueType(0);
24007 EVT SrcEltVT = SrcVT.getVectorElementType();
24008 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
24009 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
24010 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24011 switch (CastOpcode) {
24012 case ISD::SINT_TO_FP:
24013 case ISD::UINT_TO_FP:
24014 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
24015 !TLI.isTypeLegal(VT))
24016 return SDValue();
24017 break;
24018 case ISD::FP_TO_SINT:
24019 case ISD::FP_TO_UINT:
24020 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
24021 !TLI.isTypeLegal(ConcatSrcVT))
24022 return SDValue();
24023 break;
24024 default:
24025 llvm_unreachable("Unexpected cast opcode");
24026 }
24027
24028 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
24029 SDLoc DL(N);
24030 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
24031 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
24032}
24033
24034// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
24035// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
24036// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
24038 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24039 bool LegalOperations) {
24040 EVT VT = N->getValueType(0);
24041 EVT OpVT = N->getOperand(0).getValueType();
24042 if (VT.isScalableVector())
24043 return SDValue();
24044
24045 // For now, only allow simple 2-operand concatenations.
24046 if (N->getNumOperands() != 2)
24047 return SDValue();
24048
24049 // Don't create illegal types/shuffles when not allowed to.
24050 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
24051 (LegalOperations &&
24053 return SDValue();
24054
24055 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
24056 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
24057 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
24058 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
24059 // (4) and for now, the SHUFFLE_VECTOR must be unary.
24060 ShuffleVectorSDNode *SVN = nullptr;
24061 for (SDValue Op : N->ops()) {
24062 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
24063 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
24064 all_of(N->ops(), [CurSVN](SDValue Op) {
24065 // FIXME: can we allow UNDEF operands?
24066 return !Op.isUndef() &&
24067 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
24068 })) {
24069 SVN = CurSVN;
24070 break;
24071 }
24072 }
24073 if (!SVN)
24074 return SDValue();
24075
24076 // We are going to pad the shuffle operands, so any indice, that was picking
24077 // from the second operand, must be adjusted.
24078 SmallVector<int, 16> AdjustedMask;
24079 AdjustedMask.reserve(SVN->getMask().size());
24080 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
24081 append_range(AdjustedMask, SVN->getMask());
24082
24083 // Identity masks for the operands of the (padded) shuffle.
24084 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
24085 MutableArrayRef<int> FirstShufOpIdentityMask =
24086 MutableArrayRef<int>(IdentityMask)
24088 MutableArrayRef<int> SecondShufOpIdentityMask =
24090 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24091 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24093
24094 // New combined shuffle mask.
24096 Mask.reserve(VT.getVectorNumElements());
24097 for (SDValue Op : N->ops()) {
24098 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24099 if (Op.getNode() == SVN) {
24100 append_range(Mask, AdjustedMask);
24101 continue;
24102 }
24103 if (Op == SVN->getOperand(0)) {
24104 append_range(Mask, FirstShufOpIdentityMask);
24105 continue;
24106 }
24107 if (Op == SVN->getOperand(1)) {
24108 append_range(Mask, SecondShufOpIdentityMask);
24109 continue;
24110 }
24111 llvm_unreachable("Unexpected operand!");
24112 }
24113
24114 // Don't create illegal shuffle masks.
24115 if (!TLI.isShuffleMaskLegal(Mask, VT))
24116 return SDValue();
24117
24118 // Pad the shuffle operands with UNDEF.
24119 SDLoc dl(N);
24120 std::array<SDValue, 2> ShufOps;
24121 for (auto I : zip(SVN->ops(), ShufOps)) {
24122 SDValue ShufOp = std::get<0>(I);
24123 SDValue &NewShufOp = std::get<1>(I);
24124 if (ShufOp.isUndef())
24125 NewShufOp = DAG.getUNDEF(VT);
24126 else {
24127 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24128 DAG.getUNDEF(OpVT));
24129 ShufOpParts[0] = ShufOp;
24130 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24131 }
24132 }
24133 // Finally, create the new wide shuffle.
24134 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24135}
24136
24137SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24138 // If we only have one input vector, we don't need to do any concatenation.
24139 if (N->getNumOperands() == 1)
24140 return N->getOperand(0);
24141
24142 // Check if all of the operands are undefs.
24143 EVT VT = N->getValueType(0);
24145 return DAG.getUNDEF(VT);
24146
24147 // Optimize concat_vectors where all but the first of the vectors are undef.
24148 if (all_of(drop_begin(N->ops()),
24149 [](const SDValue &Op) { return Op.isUndef(); })) {
24150 SDValue In = N->getOperand(0);
24151 assert(In.getValueType().isVector() && "Must concat vectors");
24152
24153 // If the input is a concat_vectors, just make a larger concat by padding
24154 // with smaller undefs.
24155 //
24156 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24157 // here could cause an infinite loop. That legalizing happens when LegalDAG
24158 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24159 // scalable.
24160 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24161 !(LegalDAG && In.getValueType().isScalableVector())) {
24162 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24163 SmallVector<SDValue, 4> Ops(In->ops());
24164 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24165 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24166 }
24167
24169
24170 // concat_vectors(scalar_to_vector(scalar), undef) ->
24171 // scalar_to_vector(scalar)
24172 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24173 Scalar.hasOneUse()) {
24174 EVT SVT = Scalar.getValueType().getVectorElementType();
24175 if (SVT == Scalar.getOperand(0).getValueType())
24176 Scalar = Scalar.getOperand(0);
24177 }
24178
24179 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24180 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24181 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24182 // look through the trunc so we can still do the transform:
24183 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24184 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24185 !TLI.isTypeLegal(Scalar.getValueType()) &&
24186 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24187 Scalar = Scalar->getOperand(0);
24188
24189 EVT SclTy = Scalar.getValueType();
24190
24191 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24192 return SDValue();
24193
24194 // Bail out if the vector size is not a multiple of the scalar size.
24195 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24196 return SDValue();
24197
24198 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24199 if (VNTNumElms < 2)
24200 return SDValue();
24201
24202 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24203 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24204 return SDValue();
24205
24206 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24207 return DAG.getBitcast(VT, Res);
24208 }
24209 }
24210
24211 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24212 // We have already tested above for an UNDEF only concatenation.
24213 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24214 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24215 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24216 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24217 };
24218 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24220 EVT SVT = VT.getScalarType();
24221
24222 EVT MinVT = SVT;
24223 if (!SVT.isFloatingPoint()) {
24224 // If BUILD_VECTOR are from built from integer, they may have different
24225 // operand types. Get the smallest type and truncate all operands to it.
24226 bool FoundMinVT = false;
24227 for (const SDValue &Op : N->ops())
24228 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24229 EVT OpSVT = Op.getOperand(0).getValueType();
24230 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24231 FoundMinVT = true;
24232 }
24233 assert(FoundMinVT && "Concat vector type mismatch");
24234 }
24235
24236 for (const SDValue &Op : N->ops()) {
24237 EVT OpVT = Op.getValueType();
24238 unsigned NumElts = OpVT.getVectorNumElements();
24239
24240 if (ISD::UNDEF == Op.getOpcode())
24241 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24242
24243 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24244 if (SVT.isFloatingPoint()) {
24245 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24246 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24247 } else {
24248 for (unsigned i = 0; i != NumElts; ++i)
24249 Opnds.push_back(
24250 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24251 }
24252 }
24253 }
24254
24255 assert(VT.getVectorNumElements() == Opnds.size() &&
24256 "Concat vector type mismatch");
24257 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24258 }
24259
24260 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24261 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24263 return V;
24264
24265 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24266 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24268 return V;
24269
24270 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24272 return V;
24273 }
24274
24275 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24276 return V;
24277
24279 N, DAG, TLI, LegalTypes, LegalOperations))
24280 return V;
24281
24282 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24283 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24284 // operands and look for a CONCAT operations that place the incoming vectors
24285 // at the exact same location.
24286 //
24287 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24288 SDValue SingleSource = SDValue();
24289 unsigned PartNumElem =
24290 N->getOperand(0).getValueType().getVectorMinNumElements();
24291
24292 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24293 SDValue Op = N->getOperand(i);
24294
24295 if (Op.isUndef())
24296 continue;
24297
24298 // Check if this is the identity extract:
24299 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24300 return SDValue();
24301
24302 // Find the single incoming vector for the extract_subvector.
24303 if (SingleSource.getNode()) {
24304 if (Op.getOperand(0) != SingleSource)
24305 return SDValue();
24306 } else {
24307 SingleSource = Op.getOperand(0);
24308
24309 // Check the source type is the same as the type of the result.
24310 // If not, this concat may extend the vector, so we can not
24311 // optimize it away.
24312 if (SingleSource.getValueType() != N->getValueType(0))
24313 return SDValue();
24314 }
24315
24316 // Check that we are reading from the identity index.
24317 unsigned IdentityIndex = i * PartNumElem;
24318 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24319 return SDValue();
24320 }
24321
24322 if (SingleSource.getNode())
24323 return SingleSource;
24324
24325 return SDValue();
24326}
24327
24328// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24329// if the subvector can be sourced for free.
24331 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24332 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24333 return V.getOperand(1);
24334 }
24335 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24336 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24337 V.getOperand(0).getValueType() == SubVT &&
24338 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24339 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24340 return V.getOperand(SubIdx);
24341 }
24342 return SDValue();
24343}
24344
24346 SelectionDAG &DAG,
24347 bool LegalOperations) {
24348 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24349 SDValue BinOp = Extract->getOperand(0);
24350 unsigned BinOpcode = BinOp.getOpcode();
24351 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24352 return SDValue();
24353
24354 EVT VecVT = BinOp.getValueType();
24355 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24356 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24357 return SDValue();
24358
24359 SDValue Index = Extract->getOperand(1);
24360 EVT SubVT = Extract->getValueType(0);
24361 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24362 return SDValue();
24363
24364 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24365 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24366
24367 // TODO: We could handle the case where only 1 operand is being inserted by
24368 // creating an extract of the other operand, but that requires checking
24369 // number of uses and/or costs.
24370 if (!Sub0 || !Sub1)
24371 return SDValue();
24372
24373 // We are inserting both operands of the wide binop only to extract back
24374 // to the narrow vector size. Eliminate all of the insert/extract:
24375 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24376 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24377 BinOp->getFlags());
24378}
24379
24380/// If we are extracting a subvector produced by a wide binary operator try
24381/// to use a narrow binary operator and/or avoid concatenation and extraction.
24383 bool LegalOperations) {
24384 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24385 // some of these bailouts with other transforms.
24386
24387 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24388 return V;
24389
24390 // The extract index must be a constant, so we can map it to a concat operand.
24391 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24392 if (!ExtractIndexC)
24393 return SDValue();
24394
24395 // We are looking for an optionally bitcasted wide vector binary operator
24396 // feeding an extract subvector.
24397 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24398 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24399 unsigned BOpcode = BinOp.getOpcode();
24400 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24401 return SDValue();
24402
24403 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24404 // reduced to the unary fneg when it is visited, and we probably want to deal
24405 // with fneg in a target-specific way.
24406 if (BOpcode == ISD::FSUB) {
24407 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24408 if (C && C->getValueAPF().isNegZero())
24409 return SDValue();
24410 }
24411
24412 // The binop must be a vector type, so we can extract some fraction of it.
24413 EVT WideBVT = BinOp.getValueType();
24414 // The optimisations below currently assume we are dealing with fixed length
24415 // vectors. It is possible to add support for scalable vectors, but at the
24416 // moment we've done no analysis to prove whether they are profitable or not.
24417 if (!WideBVT.isFixedLengthVector())
24418 return SDValue();
24419
24420 EVT VT = Extract->getValueType(0);
24421 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24422 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24423 "Extract index is not a multiple of the vector length.");
24424
24425 // Bail out if this is not a proper multiple width extraction.
24426 unsigned WideWidth = WideBVT.getSizeInBits();
24427 unsigned NarrowWidth = VT.getSizeInBits();
24428 if (WideWidth % NarrowWidth != 0)
24429 return SDValue();
24430
24431 // Bail out if we are extracting a fraction of a single operation. This can
24432 // occur because we potentially looked through a bitcast of the binop.
24433 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24434 unsigned WideNumElts = WideBVT.getVectorNumElements();
24435 if (WideNumElts % NarrowingRatio != 0)
24436 return SDValue();
24437
24438 // Bail out if the target does not support a narrower version of the binop.
24439 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24440 WideNumElts / NarrowingRatio);
24441 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24442 LegalOperations))
24443 return SDValue();
24444
24445 // If extraction is cheap, we don't need to look at the binop operands
24446 // for concat ops. The narrow binop alone makes this transform profitable.
24447 // We can't just reuse the original extract index operand because we may have
24448 // bitcasted.
24449 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24450 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24451 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24452 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24453 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24454 SDLoc DL(Extract);
24455 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24456 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24457 BinOp.getOperand(0), NewExtIndex);
24458 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24459 BinOp.getOperand(1), NewExtIndex);
24460 SDValue NarrowBinOp =
24461 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24462 return DAG.getBitcast(VT, NarrowBinOp);
24463 }
24464
24465 // Only handle the case where we are doubling and then halving. A larger ratio
24466 // may require more than two narrow binops to replace the wide binop.
24467 if (NarrowingRatio != 2)
24468 return SDValue();
24469
24470 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24471 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24472 // flavors, but no other 256-bit integer support. This could be extended to
24473 // handle any binop, but that may require fixing/adding other folds to avoid
24474 // codegen regressions.
24475 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24476 return SDValue();
24477
24478 // We need at least one concatenation operation of a binop operand to make
24479 // this transform worthwhile. The concat must double the input vector sizes.
24480 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24481 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24482 return V.getOperand(ConcatOpNum);
24483 return SDValue();
24484 };
24485 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24486 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24487
24488 if (SubVecL || SubVecR) {
24489 // If a binop operand was not the result of a concat, we must extract a
24490 // half-sized operand for our new narrow binop:
24491 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24492 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24493 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24494 SDLoc DL(Extract);
24495 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24496 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24497 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24498 BinOp.getOperand(0), IndexC);
24499
24500 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24501 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24502 BinOp.getOperand(1), IndexC);
24503
24504 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24505 return DAG.getBitcast(VT, NarrowBinOp);
24506 }
24507
24508 return SDValue();
24509}
24510
24511/// If we are extracting a subvector from a wide vector load, convert to a
24512/// narrow load to eliminate the extraction:
24513/// (extract_subvector (load wide vector)) --> (load narrow vector)
24515 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24516 if (DAG.getDataLayout().isBigEndian())
24517 return SDValue();
24518
24519 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24520 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24521 return SDValue();
24522
24523 // Allow targets to opt-out.
24524 EVT VT = Extract->getValueType(0);
24525
24526 // We can only create byte sized loads.
24527 if (!VT.isByteSized())
24528 return SDValue();
24529
24530 unsigned Index = Extract->getConstantOperandVal(1);
24531 unsigned NumElts = VT.getVectorMinNumElements();
24532 // A fixed length vector being extracted from a scalable vector
24533 // may not be any *smaller* than the scalable one.
24534 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24535 return SDValue();
24536
24537 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24538 // multiple of the minimum number of elements in the result type.
24539 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24540 "multiple of the result's element count");
24541
24542 // It's fine to use TypeSize here as we know the offset will not be negative.
24543 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24544
24545 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24546 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24547 return SDValue();
24548
24549 // The narrow load will be offset from the base address of the old load if
24550 // we are extracting from something besides index 0 (little-endian).
24551 SDLoc DL(Extract);
24552
24553 // TODO: Use "BaseIndexOffset" to make this more effective.
24554 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24555
24558 MachineMemOperand *MMO;
24559 if (Offset.isScalable()) {
24560 MachinePointerInfo MPI =
24562 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24563 } else
24564 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24565 StoreSize);
24566
24567 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24568 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24569 return NewLd;
24570}
24571
24572/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24573/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24574/// EXTRACT_SUBVECTOR(Op?, ?),
24575/// Mask'))
24576/// iff it is legal and profitable to do so. Notably, the trimmed mask
24577/// (containing only the elements that are extracted)
24578/// must reference at most two subvectors.
24580 SelectionDAG &DAG,
24581 const TargetLowering &TLI,
24582 bool LegalOperations) {
24583 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24584 "Must only be called on EXTRACT_SUBVECTOR's");
24585
24586 SDValue N0 = N->getOperand(0);
24587
24588 // Only deal with non-scalable vectors.
24589 EVT NarrowVT = N->getValueType(0);
24590 EVT WideVT = N0.getValueType();
24591 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24592 return SDValue();
24593
24594 // The operand must be a shufflevector.
24595 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24596 if (!WideShuffleVector)
24597 return SDValue();
24598
24599 // The old shuffleneeds to go away.
24600 if (!WideShuffleVector->hasOneUse())
24601 return SDValue();
24602
24603 // And the narrow shufflevector that we'll form must be legal.
24604 if (LegalOperations &&
24606 return SDValue();
24607
24608 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24609 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24610 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24611 "Extract index is not a multiple of the output vector length.");
24612
24613 int WideNumElts = WideVT.getVectorNumElements();
24614
24615 SmallVector<int, 16> NewMask;
24616 NewMask.reserve(NumEltsExtracted);
24617 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24618 DemandedSubvectors;
24619
24620 // Try to decode the wide mask into narrow mask from at most two subvectors.
24621 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24622 NumEltsExtracted)) {
24623 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24624 "Out-of-bounds shuffle mask?");
24625
24626 if (M < 0) {
24627 // Does not depend on operands, does not require adjustment.
24628 NewMask.emplace_back(M);
24629 continue;
24630 }
24631
24632 // From which operand of the shuffle does this shuffle mask element pick?
24633 int WideShufOpIdx = M / WideNumElts;
24634 // Which element of that operand is picked?
24635 int OpEltIdx = M % WideNumElts;
24636
24637 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24638 "Shuffle mask vector decomposition failure.");
24639
24640 // And which NumEltsExtracted-sized subvector of that operand is that?
24641 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24642 // And which element within that subvector of that operand is that?
24643 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24644
24645 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24646 "Shuffle mask subvector decomposition failure.");
24647
24648 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24649 WideShufOpIdx * WideNumElts) == M &&
24650 "Shuffle mask full decomposition failure.");
24651
24652 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24653
24654 if (Op.isUndef()) {
24655 // Picking from an undef operand. Let's adjust mask instead.
24656 NewMask.emplace_back(-1);
24657 continue;
24658 }
24659
24660 const std::pair<SDValue, int> DemandedSubvector =
24661 std::make_pair(Op, OpSubvecIdx);
24662
24663 if (DemandedSubvectors.insert(DemandedSubvector)) {
24664 if (DemandedSubvectors.size() > 2)
24665 return SDValue(); // We can't handle more than two subvectors.
24666 // How many elements into the WideVT does this subvector start?
24667 int Index = NumEltsExtracted * OpSubvecIdx;
24668 // Bail out if the extraction isn't going to be cheap.
24669 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24670 return SDValue();
24671 }
24672
24673 // Ok, but from which operand of the new shuffle will this element pick?
24674 int NewOpIdx =
24675 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24676 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24677
24678 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24679 NewMask.emplace_back(AdjM);
24680 }
24681 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24682 assert(DemandedSubvectors.size() <= 2 &&
24683 "Should have ended up demanding at most two subvectors.");
24684
24685 // Did we discover that the shuffle does not actually depend on operands?
24686 if (DemandedSubvectors.empty())
24687 return DAG.getUNDEF(NarrowVT);
24688
24689 // Profitability check: only deal with extractions from the first subvector
24690 // unless the mask becomes an identity mask.
24691 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24692 any_of(NewMask, [](int M) { return M < 0; }))
24693 for (auto &DemandedSubvector : DemandedSubvectors)
24694 if (DemandedSubvector.second != 0)
24695 return SDValue();
24696
24697 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24698 // operand[s]/index[es], so there is no point in checking for it's legality.
24699
24700 // Do not turn a legal shuffle into an illegal one.
24701 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24702 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24703 return SDValue();
24704
24705 SDLoc DL(N);
24706
24708 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24709 &DemandedSubvector : DemandedSubvectors) {
24710 // How many elements into the WideVT does this subvector start?
24711 int Index = NumEltsExtracted * DemandedSubvector.second;
24712 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24713 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24714 DemandedSubvector.first, IndexC));
24715 }
24716 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24717 "Should end up with either one or two ops");
24718
24719 // If we ended up with only one operand, pad with an undef.
24720 if (NewOps.size() == 1)
24721 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24722
24723 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24724}
24725
24726SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24727 EVT NVT = N->getValueType(0);
24728 SDValue V = N->getOperand(0);
24729 uint64_t ExtIdx = N->getConstantOperandVal(1);
24730 SDLoc DL(N);
24731
24732 // Extract from UNDEF is UNDEF.
24733 if (V.isUndef())
24734 return DAG.getUNDEF(NVT);
24735
24737 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24738 return NarrowLoad;
24739
24740 // Combine an extract of an extract into a single extract_subvector.
24741 // ext (ext X, C), 0 --> ext X, C
24742 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24743 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24744 V.getConstantOperandVal(1)) &&
24746 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24747 V.getOperand(1));
24748 }
24749 }
24750
24751 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24752 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24753 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24754 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24755 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24756
24757 // extract_subvector(insert_subvector(x,y,c1),c2)
24758 // --> extract_subvector(y,c2-c1)
24759 // iff we're just extracting from the inserted subvector.
24760 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24761 SDValue InsSub = V.getOperand(1);
24762 EVT InsSubVT = InsSub.getValueType();
24763 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24764 unsigned InsIdx = V.getConstantOperandVal(2);
24765 unsigned NumSubElts = NVT.getVectorMinNumElements();
24766 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24767 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24768 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24769 V.getValueType().isFixedLengthVector())
24770 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24771 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24772 }
24773
24774 // Try to move vector bitcast after extract_subv by scaling extraction index:
24775 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24776 if (V.getOpcode() == ISD::BITCAST &&
24777 V.getOperand(0).getValueType().isVector() &&
24778 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24779 SDValue SrcOp = V.getOperand(0);
24780 EVT SrcVT = SrcOp.getValueType();
24781 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24782 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24783 if ((SrcNumElts % DestNumElts) == 0) {
24784 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24785 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24786 EVT NewExtVT =
24787 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24789 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24790 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24791 V.getOperand(0), NewIndex);
24792 return DAG.getBitcast(NVT, NewExtract);
24793 }
24794 }
24795 if ((DestNumElts % SrcNumElts) == 0) {
24796 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24797 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24798 ElementCount NewExtEC =
24799 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24800 EVT ScalarVT = SrcVT.getScalarType();
24801 if ((ExtIdx % DestSrcRatio) == 0) {
24802 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24803 EVT NewExtVT =
24804 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24806 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24807 SDValue NewExtract =
24808 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24809 V.getOperand(0), NewIndex);
24810 return DAG.getBitcast(NVT, NewExtract);
24811 }
24812 if (NewExtEC.isScalar() &&
24814 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24815 SDValue NewExtract =
24816 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24817 V.getOperand(0), NewIndex);
24818 return DAG.getBitcast(NVT, NewExtract);
24819 }
24820 }
24821 }
24822 }
24823 }
24824
24825 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24826 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24827 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24828 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24829 "Concat and extract subvector do not change element type");
24830 assert((ExtIdx % ExtNumElts) == 0 &&
24831 "Extract index is not a multiple of the input vector length.");
24832
24833 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24834 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24835
24836 // If the concatenated source types match this extract, it's a direct
24837 // simplification:
24838 // extract_subvec (concat V1, V2, ...), i --> Vi
24839 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24840 return V.getOperand(ConcatOpIdx);
24841
24842 // If the concatenated source vectors are a multiple length of this extract,
24843 // then extract a fraction of one of those source vectors directly from a
24844 // concat operand. Example:
24845 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24846 // v2i8 extract_subvec v8i8 Y, 6
24847 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24848 ConcatSrcNumElts % ExtNumElts == 0) {
24849 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24850 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24851 "Trying to extract from >1 concat operand?");
24852 assert(NewExtIdx % ExtNumElts == 0 &&
24853 "Extract index is not a multiple of the input vector length.");
24854 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24855 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24856 V.getOperand(ConcatOpIdx), NewIndexC);
24857 }
24858 }
24859
24860 if (SDValue V =
24861 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24862 return V;
24863
24865
24866 // If the input is a build vector. Try to make a smaller build vector.
24867 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24868 EVT InVT = V.getValueType();
24869 unsigned ExtractSize = NVT.getSizeInBits();
24870 unsigned EltSize = InVT.getScalarSizeInBits();
24871 // Only do this if we won't split any elements.
24872 if (ExtractSize % EltSize == 0) {
24873 unsigned NumElems = ExtractSize / EltSize;
24874 EVT EltVT = InVT.getVectorElementType();
24875 EVT ExtractVT =
24876 NumElems == 1 ? EltVT
24877 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24878 if ((Level < AfterLegalizeDAG ||
24879 (NumElems == 1 ||
24880 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24881 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24882 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24883
24884 if (NumElems == 1) {
24885 SDValue Src = V->getOperand(IdxVal);
24886 if (EltVT != Src.getValueType())
24887 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24888 return DAG.getBitcast(NVT, Src);
24889 }
24890
24891 // Extract the pieces from the original build_vector.
24892 SDValue BuildVec =
24893 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24894 return DAG.getBitcast(NVT, BuildVec);
24895 }
24896 }
24897 }
24898
24899 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24900 // Handle only simple case where vector being inserted and vector
24901 // being extracted are of same size.
24902 EVT SmallVT = V.getOperand(1).getValueType();
24903 if (!NVT.bitsEq(SmallVT))
24904 return SDValue();
24905
24906 // Combine:
24907 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24908 // Into:
24909 // indices are equal or bit offsets are equal => V1
24910 // otherwise => (extract_subvec V1, ExtIdx)
24911 uint64_t InsIdx = V.getConstantOperandVal(2);
24912 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24913 ExtIdx * NVT.getScalarSizeInBits()) {
24914 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24915 return SDValue();
24916
24917 return DAG.getBitcast(NVT, V.getOperand(1));
24918 }
24919 return DAG.getNode(
24921 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24922 N->getOperand(1));
24923 }
24924
24925 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24926 return NarrowBOp;
24927
24929 return SDValue(N, 0);
24930
24931 return SDValue();
24932}
24933
24934/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24935/// followed by concatenation. Narrow vector ops may have better performance
24936/// than wide ops, and this can unlock further narrowing of other vector ops.
24937/// Targets can invert this transform later if it is not profitable.
24939 SelectionDAG &DAG) {
24940 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24941 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24942 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24943 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24944 return SDValue();
24945
24946 // Split the wide shuffle mask into halves. Any mask element that is accessing
24947 // operand 1 is offset down to account for narrowing of the vectors.
24948 ArrayRef<int> Mask = Shuf->getMask();
24949 EVT VT = Shuf->getValueType(0);
24950 unsigned NumElts = VT.getVectorNumElements();
24951 unsigned HalfNumElts = NumElts / 2;
24952 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24953 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24954 for (unsigned i = 0; i != NumElts; ++i) {
24955 if (Mask[i] == -1)
24956 continue;
24957 // If we reference the upper (undef) subvector then the element is undef.
24958 if ((Mask[i] % NumElts) >= HalfNumElts)
24959 continue;
24960 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24961 if (i < HalfNumElts)
24962 Mask0[i] = M;
24963 else
24964 Mask1[i - HalfNumElts] = M;
24965 }
24966
24967 // Ask the target if this is a valid transform.
24968 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24969 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24970 HalfNumElts);
24971 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24972 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24973 return SDValue();
24974
24975 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24976 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24977 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24978 SDLoc DL(Shuf);
24979 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24980 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24981 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24982}
24983
24984// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24985// or turn a shuffle of a single concat into simpler shuffle then concat.
24987 EVT VT = N->getValueType(0);
24988 unsigned NumElts = VT.getVectorNumElements();
24989
24990 SDValue N0 = N->getOperand(0);
24991 SDValue N1 = N->getOperand(1);
24992 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24993 ArrayRef<int> Mask = SVN->getMask();
24994
24996 EVT ConcatVT = N0.getOperand(0).getValueType();
24997 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24998 unsigned NumConcats = NumElts / NumElemsPerConcat;
24999
25000 auto IsUndefMaskElt = [](int i) { return i == -1; };
25001
25002 // Special case: shuffle(concat(A,B)) can be more efficiently represented
25003 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
25004 // half vector elements.
25005 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
25006 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
25007 IsUndefMaskElt)) {
25008 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
25009 N0.getOperand(1),
25010 Mask.slice(0, NumElemsPerConcat));
25011 N1 = DAG.getUNDEF(ConcatVT);
25012 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
25013 }
25014
25015 // Look at every vector that's inserted. We're looking for exact
25016 // subvector-sized copies from a concatenated vector
25017 for (unsigned I = 0; I != NumConcats; ++I) {
25018 unsigned Begin = I * NumElemsPerConcat;
25019 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
25020
25021 // Make sure we're dealing with a copy.
25022 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
25023 Ops.push_back(DAG.getUNDEF(ConcatVT));
25024 continue;
25025 }
25026
25027 int OpIdx = -1;
25028 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
25029 if (IsUndefMaskElt(SubMask[i]))
25030 continue;
25031 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
25032 return SDValue();
25033 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
25034 if (0 <= OpIdx && EltOpIdx != OpIdx)
25035 return SDValue();
25036 OpIdx = EltOpIdx;
25037 }
25038 assert(0 <= OpIdx && "Unknown concat_vectors op");
25039
25040 if (OpIdx < (int)N0.getNumOperands())
25041 Ops.push_back(N0.getOperand(OpIdx));
25042 else
25043 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
25044 }
25045
25046 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
25047}
25048
25049// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25050// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25051//
25052// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
25053// a simplification in some sense, but it isn't appropriate in general: some
25054// BUILD_VECTORs are substantially cheaper than others. The general case
25055// of a BUILD_VECTOR requires inserting each element individually (or
25056// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
25057// all constants is a single constant pool load. A BUILD_VECTOR where each
25058// element is identical is a splat. A BUILD_VECTOR where most of the operands
25059// are undef lowers to a small number of element insertions.
25060//
25061// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
25062// We don't fold shuffles where one side is a non-zero constant, and we don't
25063// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
25064// non-constant operands. This seems to work out reasonably well in practice.
25066 SelectionDAG &DAG,
25067 const TargetLowering &TLI) {
25068 EVT VT = SVN->getValueType(0);
25069 unsigned NumElts = VT.getVectorNumElements();
25070 SDValue N0 = SVN->getOperand(0);
25071 SDValue N1 = SVN->getOperand(1);
25072
25073 if (!N0->hasOneUse())
25074 return SDValue();
25075
25076 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
25077 // discussed above.
25078 if (!N1.isUndef()) {
25079 if (!N1->hasOneUse())
25080 return SDValue();
25081
25082 bool N0AnyConst = isAnyConstantBuildVector(N0);
25083 bool N1AnyConst = isAnyConstantBuildVector(N1);
25084 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
25085 return SDValue();
25086 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
25087 return SDValue();
25088 }
25089
25090 // If both inputs are splats of the same value then we can safely merge this
25091 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25092 bool IsSplat = false;
25093 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25094 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25095 if (BV0 && BV1)
25096 if (SDValue Splat0 = BV0->getSplatValue())
25097 IsSplat = (Splat0 == BV1->getSplatValue());
25098
25100 SmallSet<SDValue, 16> DuplicateOps;
25101 for (int M : SVN->getMask()) {
25102 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25103 if (M >= 0) {
25104 int Idx = M < (int)NumElts ? M : M - NumElts;
25105 SDValue &S = (M < (int)NumElts ? N0 : N1);
25106 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25107 Op = S.getOperand(Idx);
25108 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25109 SDValue Op0 = S.getOperand(0);
25110 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25111 } else {
25112 // Operand can't be combined - bail out.
25113 return SDValue();
25114 }
25115 }
25116
25117 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25118 // generating a splat; semantically, this is fine, but it's likely to
25119 // generate low-quality code if the target can't reconstruct an appropriate
25120 // shuffle.
25121 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25122 if (!IsSplat && !DuplicateOps.insert(Op).second)
25123 return SDValue();
25124
25125 Ops.push_back(Op);
25126 }
25127
25128 // BUILD_VECTOR requires all inputs to be of the same type, find the
25129 // maximum type and extend them all.
25130 EVT SVT = VT.getScalarType();
25131 if (SVT.isInteger())
25132 for (SDValue &Op : Ops)
25133 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25134 if (SVT != VT.getScalarType())
25135 for (SDValue &Op : Ops)
25136 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25137 : (TLI.isZExtFree(Op.getValueType(), SVT)
25138 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25139 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25140 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25141}
25142
25143// Match shuffles that can be converted to *_vector_extend_in_reg.
25144// This is often generated during legalization.
25145// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25146// and returns the EVT to which the extension should be performed.
25147// NOTE: this assumes that the src is the first operand of the shuffle.
25149 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25150 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25151 bool LegalOperations) {
25152 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25153
25154 // TODO Add support for big-endian when we have a test case.
25155 if (!VT.isInteger() || IsBigEndian)
25156 return std::nullopt;
25157
25158 unsigned NumElts = VT.getVectorNumElements();
25159 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25160
25161 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25162 // power-of-2 extensions as they are the most likely.
25163 // FIXME: should try Scale == NumElts case too,
25164 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25165 // The vector width must be a multiple of Scale.
25166 if (NumElts % Scale != 0)
25167 continue;
25168
25169 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25170 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25171
25172 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25173 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25174 continue;
25175
25176 if (Match(Scale))
25177 return OutVT;
25178 }
25179
25180 return std::nullopt;
25181}
25182
25183// Match shuffles that can be converted to any_vector_extend_in_reg.
25184// This is often generated during legalization.
25185// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25187 SelectionDAG &DAG,
25188 const TargetLowering &TLI,
25189 bool LegalOperations) {
25190 EVT VT = SVN->getValueType(0);
25191 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25192
25193 // TODO Add support for big-endian when we have a test case.
25194 if (!VT.isInteger() || IsBigEndian)
25195 return SDValue();
25196
25197 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25198 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25199 Mask = SVN->getMask()](unsigned Scale) {
25200 for (unsigned i = 0; i != NumElts; ++i) {
25201 if (Mask[i] < 0)
25202 continue;
25203 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25204 continue;
25205 return false;
25206 }
25207 return true;
25208 };
25209
25210 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25211 SDValue N0 = SVN->getOperand(0);
25212 // Never create an illegal type. Only create unsupported operations if we
25213 // are pre-legalization.
25214 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25215 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25216 if (!OutVT)
25217 return SDValue();
25218 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25219}
25220
25221// Match shuffles that can be converted to zero_extend_vector_inreg.
25222// This is often generated during legalization.
25223// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25225 SelectionDAG &DAG,
25226 const TargetLowering &TLI,
25227 bool LegalOperations) {
25228 bool LegalTypes = true;
25229 EVT VT = SVN->getValueType(0);
25230 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25231 unsigned NumElts = VT.getVectorNumElements();
25232 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25233
25234 // TODO: add support for big-endian when we have a test case.
25235 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25236 if (!VT.isInteger() || IsBigEndian)
25237 return SDValue();
25238
25239 SmallVector<int, 16> Mask(SVN->getMask());
25240 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25241 for (int &Indice : Mask) {
25242 if (Indice < 0)
25243 continue;
25244 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25245 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25246 Fn(Indice, OpIdx, OpEltIdx);
25247 }
25248 };
25249
25250 // Which elements of which operand does this shuffle demand?
25251 std::array<APInt, 2> OpsDemandedElts;
25252 for (APInt &OpDemandedElts : OpsDemandedElts)
25253 OpDemandedElts = APInt::getZero(NumElts);
25254 ForEachDecomposedIndice(
25255 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25256 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25257 });
25258
25259 // Element-wise(!), which of these demanded elements are know to be zero?
25260 std::array<APInt, 2> OpsKnownZeroElts;
25261 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25262 std::get<2>(I) =
25263 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25264
25265 // Manifest zeroable element knowledge in the shuffle mask.
25266 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25267 // this is a local invention, but it won't leak into DAG.
25268 // FIXME: should we not manifest them, but just check when matching?
25269 bool HadZeroableElts = false;
25270 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25271 int &Indice, int OpIdx, int OpEltIdx) {
25272 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25273 Indice = -2; // Zeroable element.
25274 HadZeroableElts = true;
25275 }
25276 });
25277
25278 // Don't proceed unless we've refined at least one zeroable mask indice.
25279 // If we didn't, then we are still trying to match the same shuffle mask
25280 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25281 // and evidently failed. Proceeding will lead to endless combine loops.
25282 if (!HadZeroableElts)
25283 return SDValue();
25284
25285 // The shuffle may be more fine-grained than we want. Widen elements first.
25286 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25287 SmallVector<int, 16> ScaledMask;
25288 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25289 assert(Mask.size() >= ScaledMask.size() &&
25290 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25291 int Prescale = Mask.size() / ScaledMask.size();
25292
25293 NumElts = ScaledMask.size();
25294 EltSizeInBits *= Prescale;
25295
25296 EVT PrescaledVT = EVT::getVectorVT(
25297 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25298 NumElts);
25299
25300 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25301 return SDValue();
25302
25303 // For example,
25304 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25305 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25306 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25307 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25308 "Unexpected mask scaling factor.");
25309 ArrayRef<int> Mask = ScaledMask;
25310 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25311 SrcElt != NumSrcElts; ++SrcElt) {
25312 // Analyze the shuffle mask in Scale-sized chunks.
25313 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25314 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25315 Mask = Mask.drop_front(MaskChunk.size());
25316 // The first indice in this chunk must be SrcElt, but not zero!
25317 // FIXME: undef should be fine, but that results in more-defined result.
25318 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25319 return false;
25320 // The rest of the indices in this chunk must be zeros.
25321 // FIXME: undef should be fine, but that results in more-defined result.
25322 if (!all_of(MaskChunk.drop_front(1),
25323 [](int Indice) { return Indice == -2; }))
25324 return false;
25325 }
25326 assert(Mask.empty() && "Did not process the whole mask?");
25327 return true;
25328 };
25329
25330 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25331 for (bool Commuted : {false, true}) {
25332 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25333 if (Commuted)
25335 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25336 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25337 LegalOperations);
25338 if (OutVT)
25339 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25340 DAG.getBitcast(PrescaledVT, Op)));
25341 }
25342 return SDValue();
25343}
25344
25345// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25346// each source element of a large type into the lowest elements of a smaller
25347// destination type. This is often generated during legalization.
25348// If the source node itself was a '*_extend_vector_inreg' node then we should
25349// then be able to remove it.
25351 SelectionDAG &DAG) {
25352 EVT VT = SVN->getValueType(0);
25353 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25354
25355 // TODO Add support for big-endian when we have a test case.
25356 if (!VT.isInteger() || IsBigEndian)
25357 return SDValue();
25358
25360
25361 unsigned Opcode = N0.getOpcode();
25362 if (!ISD::isExtVecInRegOpcode(Opcode))
25363 return SDValue();
25364
25365 SDValue N00 = N0.getOperand(0);
25366 ArrayRef<int> Mask = SVN->getMask();
25367 unsigned NumElts = VT.getVectorNumElements();
25368 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25369 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25370 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25371
25372 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25373 return SDValue();
25374 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25375
25376 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25377 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25378 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25379 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25380 for (unsigned i = 0; i != NumElts; ++i) {
25381 if (Mask[i] < 0)
25382 continue;
25383 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25384 continue;
25385 return false;
25386 }
25387 return true;
25388 };
25389
25390 // At the moment we just handle the case where we've truncated back to the
25391 // same size as before the extension.
25392 // TODO: handle more extension/truncation cases as cases arise.
25393 if (EltSizeInBits != ExtSrcSizeInBits)
25394 return SDValue();
25395
25396 // We can remove *extend_vector_inreg only if the truncation happens at
25397 // the same scale as the extension.
25398 if (isTruncate(ExtScale))
25399 return DAG.getBitcast(VT, N00);
25400
25401 return SDValue();
25402}
25403
25404// Combine shuffles of splat-shuffles of the form:
25405// shuffle (shuffle V, undef, splat-mask), undef, M
25406// If splat-mask contains undef elements, we need to be careful about
25407// introducing undef's in the folded mask which are not the result of composing
25408// the masks of the shuffles.
25410 SelectionDAG &DAG) {
25411 EVT VT = Shuf->getValueType(0);
25412 unsigned NumElts = VT.getVectorNumElements();
25413
25414 if (!Shuf->getOperand(1).isUndef())
25415 return SDValue();
25416
25417 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25418 // in disguise, with all demanded elements being identical.
25419 // FIXME: this can be done per-operand.
25420 if (!Shuf->isSplat()) {
25421 APInt DemandedElts(NumElts, 0);
25422 for (int Idx : Shuf->getMask()) {
25423 if (Idx < 0)
25424 continue; // Ignore sentinel indices.
25425 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25426 DemandedElts.setBit(Idx);
25427 }
25428 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25429 APInt UndefElts;
25430 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25431 // Even if all demanded elements are splat, some of them could be undef.
25432 // Which lowest demanded element is *not* known-undef?
25433 std::optional<unsigned> MinNonUndefIdx;
25434 for (int Idx : Shuf->getMask()) {
25435 if (Idx < 0 || UndefElts[Idx])
25436 continue; // Ignore sentinel indices, and undef elements.
25437 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25438 }
25439 if (!MinNonUndefIdx)
25440 return DAG.getUNDEF(VT); // All undef - result is undef.
25441 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25442 SmallVector<int, 8> SplatMask(Shuf->getMask());
25443 for (int &Idx : SplatMask) {
25444 if (Idx < 0)
25445 continue; // Passthrough sentinel indices.
25446 // Otherwise, just pick the lowest demanded non-undef element.
25447 // Or sentinel undef, if we know we'd pick a known-undef element.
25448 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25449 }
25450 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25451 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25452 Shuf->getOperand(1), SplatMask);
25453 }
25454 }
25455
25456 // If the inner operand is a known splat with no undefs, just return that directly.
25457 // TODO: Create DemandedElts mask from Shuf's mask.
25458 // TODO: Allow undef elements and merge with the shuffle code below.
25459 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25460 return Shuf->getOperand(0);
25461
25462 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25463 if (!Splat || !Splat->isSplat())
25464 return SDValue();
25465
25466 ArrayRef<int> ShufMask = Shuf->getMask();
25467 ArrayRef<int> SplatMask = Splat->getMask();
25468 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25469
25470 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25471 // every undef mask element in the splat-shuffle has a corresponding undef
25472 // element in the user-shuffle's mask or if the composition of mask elements
25473 // would result in undef.
25474 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25475 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25476 // In this case it is not legal to simplify to the splat-shuffle because we
25477 // may be exposing the users of the shuffle an undef element at index 1
25478 // which was not there before the combine.
25479 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25480 // In this case the composition of masks yields SplatMask, so it's ok to
25481 // simplify to the splat-shuffle.
25482 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25483 // In this case the composed mask includes all undef elements of SplatMask
25484 // and in addition sets element zero to undef. It is safe to simplify to
25485 // the splat-shuffle.
25486 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25487 ArrayRef<int> SplatMask) {
25488 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25489 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25490 SplatMask[UserMask[i]] != -1)
25491 return false;
25492 return true;
25493 };
25494 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25495 return Shuf->getOperand(0);
25496
25497 // Create a new shuffle with a mask that is composed of the two shuffles'
25498 // masks.
25499 SmallVector<int, 32> NewMask;
25500 for (int Idx : ShufMask)
25501 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25502
25503 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25504 Splat->getOperand(0), Splat->getOperand(1),
25505 NewMask);
25506}
25507
25508// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25509// the mask can be treated as a larger type.
25511 SelectionDAG &DAG,
25512 const TargetLowering &TLI,
25513 bool LegalOperations) {
25514 SDValue Op0 = SVN->getOperand(0);
25515 SDValue Op1 = SVN->getOperand(1);
25516 EVT VT = SVN->getValueType(0);
25517 if (Op0.getOpcode() != ISD::BITCAST)
25518 return SDValue();
25519 EVT InVT = Op0.getOperand(0).getValueType();
25520 if (!InVT.isVector() ||
25521 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25522 Op1.getOperand(0).getValueType() != InVT)))
25523 return SDValue();
25525 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25526 return SDValue();
25527
25528 int VTLanes = VT.getVectorNumElements();
25529 int InLanes = InVT.getVectorNumElements();
25530 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25531 (LegalOperations &&
25533 return SDValue();
25534 int Factor = VTLanes / InLanes;
25535
25536 // Check that each group of lanes in the mask are either undef or make a valid
25537 // mask for the wider lane type.
25538 ArrayRef<int> Mask = SVN->getMask();
25539 SmallVector<int> NewMask;
25540 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25541 return SDValue();
25542
25543 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25544 return SDValue();
25545
25546 // Create the new shuffle with the new mask and bitcast it back to the
25547 // original type.
25548 SDLoc DL(SVN);
25549 Op0 = Op0.getOperand(0);
25550 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25551 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25552 return DAG.getBitcast(VT, NewShuf);
25553}
25554
25555/// Combine shuffle of shuffle of the form:
25556/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25558 SelectionDAG &DAG) {
25559 if (!OuterShuf->getOperand(1).isUndef())
25560 return SDValue();
25561 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25562 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25563 return SDValue();
25564
25565 ArrayRef<int> OuterMask = OuterShuf->getMask();
25566 ArrayRef<int> InnerMask = InnerShuf->getMask();
25567 unsigned NumElts = OuterMask.size();
25568 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25569 SmallVector<int, 32> CombinedMask(NumElts, -1);
25570 int SplatIndex = -1;
25571 for (unsigned i = 0; i != NumElts; ++i) {
25572 // Undef lanes remain undef.
25573 int OuterMaskElt = OuterMask[i];
25574 if (OuterMaskElt == -1)
25575 continue;
25576
25577 // Peek through the shuffle masks to get the underlying source element.
25578 int InnerMaskElt = InnerMask[OuterMaskElt];
25579 if (InnerMaskElt == -1)
25580 continue;
25581
25582 // Initialize the splatted element.
25583 if (SplatIndex == -1)
25584 SplatIndex = InnerMaskElt;
25585
25586 // Non-matching index - this is not a splat.
25587 if (SplatIndex != InnerMaskElt)
25588 return SDValue();
25589
25590 CombinedMask[i] = InnerMaskElt;
25591 }
25592 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25593 getSplatIndex(CombinedMask) != -1) &&
25594 "Expected a splat mask");
25595
25596 // TODO: The transform may be a win even if the mask is not legal.
25597 EVT VT = OuterShuf->getValueType(0);
25598 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25599 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25600 return SDValue();
25601
25602 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25603 InnerShuf->getOperand(1), CombinedMask);
25604}
25605
25606/// If the shuffle mask is taking exactly one element from the first vector
25607/// operand and passing through all other elements from the second vector
25608/// operand, return the index of the mask element that is choosing an element
25609/// from the first operand. Otherwise, return -1.
25611 int MaskSize = Mask.size();
25612 int EltFromOp0 = -1;
25613 // TODO: This does not match if there are undef elements in the shuffle mask.
25614 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25615 // removing an instruction (a shuffle), but losing the knowledge that some
25616 // vector lanes are not needed.
25617 for (int i = 0; i != MaskSize; ++i) {
25618 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25619 // We're looking for a shuffle of exactly one element from operand 0.
25620 if (EltFromOp0 != -1)
25621 return -1;
25622 EltFromOp0 = i;
25623 } else if (Mask[i] != i + MaskSize) {
25624 // Nothing from operand 1 can change lanes.
25625 return -1;
25626 }
25627 }
25628 return EltFromOp0;
25629}
25630
25631/// If a shuffle inserts exactly one element from a source vector operand into
25632/// another vector operand and we can access the specified element as a scalar,
25633/// then we can eliminate the shuffle.
25635 SelectionDAG &DAG) {
25636 // First, check if we are taking one element of a vector and shuffling that
25637 // element into another vector.
25638 ArrayRef<int> Mask = Shuf->getMask();
25639 SmallVector<int, 16> CommutedMask(Mask);
25640 SDValue Op0 = Shuf->getOperand(0);
25641 SDValue Op1 = Shuf->getOperand(1);
25642 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25643 if (ShufOp0Index == -1) {
25644 // Commute mask and check again.
25646 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25647 if (ShufOp0Index == -1)
25648 return SDValue();
25649 // Commute operands to match the commuted shuffle mask.
25650 std::swap(Op0, Op1);
25651 Mask = CommutedMask;
25652 }
25653
25654 // The shuffle inserts exactly one element from operand 0 into operand 1.
25655 // Now see if we can access that element as a scalar via a real insert element
25656 // instruction.
25657 // TODO: We can try harder to locate the element as a scalar. Examples: it
25658 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25659 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25660 "Shuffle mask value must be from operand 0");
25661 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25662 return SDValue();
25663
25664 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25665 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25666 return SDValue();
25667
25668 // There's an existing insertelement with constant insertion index, so we
25669 // don't need to check the legality/profitability of a replacement operation
25670 // that differs at most in the constant value. The target should be able to
25671 // lower any of those in a similar way. If not, legalization will expand this
25672 // to a scalar-to-vector plus shuffle.
25673 //
25674 // Note that the shuffle may move the scalar from the position that the insert
25675 // element used. Therefore, our new insert element occurs at the shuffle's
25676 // mask index value, not the insert's index value.
25677 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25678 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25679 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25680 Op1, Op0.getOperand(1), NewInsIndex);
25681}
25682
25683/// If we have a unary shuffle of a shuffle, see if it can be folded away
25684/// completely. This has the potential to lose undef knowledge because the first
25685/// shuffle may not have an undef mask element where the second one does. So
25686/// only call this after doing simplifications based on demanded elements.
25688 // shuf (shuf0 X, Y, Mask0), undef, Mask
25689 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25690 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25691 return SDValue();
25692
25693 ArrayRef<int> Mask = Shuf->getMask();
25694 ArrayRef<int> Mask0 = Shuf0->getMask();
25695 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25696 // Ignore undef elements.
25697 if (Mask[i] == -1)
25698 continue;
25699 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25700
25701 // Is the element of the shuffle operand chosen by this shuffle the same as
25702 // the element chosen by the shuffle operand itself?
25703 if (Mask0[Mask[i]] != Mask0[i])
25704 return SDValue();
25705 }
25706 // Every element of this shuffle is identical to the result of the previous
25707 // shuffle, so we can replace this value.
25708 return Shuf->getOperand(0);
25709}
25710
25711SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25712 EVT VT = N->getValueType(0);
25713 unsigned NumElts = VT.getVectorNumElements();
25714
25715 SDValue N0 = N->getOperand(0);
25716 SDValue N1 = N->getOperand(1);
25717
25718 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25719
25720 // Canonicalize shuffle undef, undef -> undef
25721 if (N0.isUndef() && N1.isUndef())
25722 return DAG.getUNDEF(VT);
25723
25724 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25725
25726 // Canonicalize shuffle v, v -> v, undef
25727 if (N0 == N1)
25728 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25729 createUnaryMask(SVN->getMask(), NumElts));
25730
25731 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25732 if (N0.isUndef())
25733 return DAG.getCommutedVectorShuffle(*SVN);
25734
25735 // Remove references to rhs if it is undef
25736 if (N1.isUndef()) {
25737 bool Changed = false;
25738 SmallVector<int, 8> NewMask;
25739 for (unsigned i = 0; i != NumElts; ++i) {
25740 int Idx = SVN->getMaskElt(i);
25741 if (Idx >= (int)NumElts) {
25742 Idx = -1;
25743 Changed = true;
25744 }
25745 NewMask.push_back(Idx);
25746 }
25747 if (Changed)
25748 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25749 }
25750
25751 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25752 return InsElt;
25753
25754 // A shuffle of a single vector that is a splatted value can always be folded.
25755 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25756 return V;
25757
25758 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25759 return V;
25760
25761 // If it is a splat, check if the argument vector is another splat or a
25762 // build_vector.
25763 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25764 int SplatIndex = SVN->getSplatIndex();
25765 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25766 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25767 // splat (vector_bo L, R), Index -->
25768 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25769 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25770 SDLoc DL(N);
25771 EVT EltVT = VT.getScalarType();
25772 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25773 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25774 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25775 SDValue NewBO =
25776 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25777 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25779 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25780 }
25781
25782 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25783 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25784 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25785 N0.hasOneUse()) {
25786 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25787 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25788
25790 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25791 if (Idx->getAPIntValue() == SplatIndex)
25792 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25793
25794 // Look through a bitcast if LE and splatting lane 0, through to a
25795 // scalar_to_vector or a build_vector.
25796 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25797 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25800 EVT N00VT = N0.getOperand(0).getValueType();
25801 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25802 VT.isInteger() && N00VT.isInteger()) {
25803 EVT InVT =
25806 SDLoc(N), InVT);
25807 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25808 }
25809 }
25810 }
25811
25812 // If this is a bit convert that changes the element type of the vector but
25813 // not the number of vector elements, look through it. Be careful not to
25814 // look though conversions that change things like v4f32 to v2f64.
25815 SDNode *V = N0.getNode();
25816 if (V->getOpcode() == ISD::BITCAST) {
25817 SDValue ConvInput = V->getOperand(0);
25818 if (ConvInput.getValueType().isVector() &&
25819 ConvInput.getValueType().getVectorNumElements() == NumElts)
25820 V = ConvInput.getNode();
25821 }
25822
25823 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25824 assert(V->getNumOperands() == NumElts &&
25825 "BUILD_VECTOR has wrong number of operands");
25826 SDValue Base;
25827 bool AllSame = true;
25828 for (unsigned i = 0; i != NumElts; ++i) {
25829 if (!V->getOperand(i).isUndef()) {
25830 Base = V->getOperand(i);
25831 break;
25832 }
25833 }
25834 // Splat of <u, u, u, u>, return <u, u, u, u>
25835 if (!Base.getNode())
25836 return N0;
25837 for (unsigned i = 0; i != NumElts; ++i) {
25838 if (V->getOperand(i) != Base) {
25839 AllSame = false;
25840 break;
25841 }
25842 }
25843 // Splat of <x, x, x, x>, return <x, x, x, x>
25844 if (AllSame)
25845 return N0;
25846
25847 // Canonicalize any other splat as a build_vector.
25848 SDValue Splatted = V->getOperand(SplatIndex);
25849 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25850 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25851
25852 // We may have jumped through bitcasts, so the type of the
25853 // BUILD_VECTOR may not match the type of the shuffle.
25854 if (V->getValueType(0) != VT)
25855 NewBV = DAG.getBitcast(VT, NewBV);
25856 return NewBV;
25857 }
25858 }
25859
25860 // Simplify source operands based on shuffle mask.
25862 return SDValue(N, 0);
25863
25864 // This is intentionally placed after demanded elements simplification because
25865 // it could eliminate knowledge of undef elements created by this shuffle.
25866 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25867 return ShufOp;
25868
25869 // Match shuffles that can be converted to any_vector_extend_in_reg.
25870 if (SDValue V =
25871 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25872 return V;
25873
25874 // Combine "truncate_vector_in_reg" style shuffles.
25875 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25876 return V;
25877
25878 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25879 Level < AfterLegalizeVectorOps &&
25880 (N1.isUndef() ||
25881 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25882 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25883 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25884 return V;
25885 }
25886
25887 // A shuffle of a concat of the same narrow vector can be reduced to use
25888 // only low-half elements of a concat with undef:
25889 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25890 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25891 N0.getNumOperands() == 2 &&
25892 N0.getOperand(0) == N0.getOperand(1)) {
25893 int HalfNumElts = (int)NumElts / 2;
25894 SmallVector<int, 8> NewMask;
25895 for (unsigned i = 0; i != NumElts; ++i) {
25896 int Idx = SVN->getMaskElt(i);
25897 if (Idx >= HalfNumElts) {
25898 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25899 Idx -= HalfNumElts;
25900 }
25901 NewMask.push_back(Idx);
25902 }
25903 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25904 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25905 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25906 N0.getOperand(0), UndefVec);
25907 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25908 }
25909 }
25910
25911 // See if we can replace a shuffle with an insert_subvector.
25912 // e.g. v2i32 into v8i32:
25913 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25914 // --> insert_subvector(lhs,rhs1,4).
25915 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25917 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25918 // Ensure RHS subvectors are legal.
25919 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25920 EVT SubVT = RHS.getOperand(0).getValueType();
25921 int NumSubVecs = RHS.getNumOperands();
25922 int NumSubElts = SubVT.getVectorNumElements();
25923 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25924 if (!TLI.isTypeLegal(SubVT))
25925 return SDValue();
25926
25927 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25928 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25929 return SDValue();
25930
25931 // Search [NumSubElts] spans for RHS sequence.
25932 // TODO: Can we avoid nested loops to increase performance?
25933 SmallVector<int> InsertionMask(NumElts);
25934 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25935 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25936 // Reset mask to identity.
25937 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25938
25939 // Add subvector insertion.
25940 std::iota(InsertionMask.begin() + SubIdx,
25941 InsertionMask.begin() + SubIdx + NumSubElts,
25942 NumElts + (SubVec * NumSubElts));
25943
25944 // See if the shuffle mask matches the reference insertion mask.
25945 bool MatchingShuffle = true;
25946 for (int i = 0; i != (int)NumElts; ++i) {
25947 int ExpectIdx = InsertionMask[i];
25948 int ActualIdx = Mask[i];
25949 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25950 MatchingShuffle = false;
25951 break;
25952 }
25953 }
25954
25955 if (MatchingShuffle)
25956 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25957 RHS.getOperand(SubVec),
25958 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25959 }
25960 }
25961 return SDValue();
25962 };
25963 ArrayRef<int> Mask = SVN->getMask();
25964 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25965 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25966 return InsertN1;
25967 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25968 SmallVector<int> CommuteMask(Mask);
25970 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25971 return InsertN0;
25972 }
25973 }
25974
25975 // If we're not performing a select/blend shuffle, see if we can convert the
25976 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25977 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25978 bool IsInLaneMask = true;
25979 ArrayRef<int> Mask = SVN->getMask();
25980 SmallVector<int, 16> ClearMask(NumElts, -1);
25981 APInt DemandedLHS = APInt::getZero(NumElts);
25982 APInt DemandedRHS = APInt::getZero(NumElts);
25983 for (int I = 0; I != (int)NumElts; ++I) {
25984 int M = Mask[I];
25985 if (M < 0)
25986 continue;
25987 ClearMask[I] = M == I ? I : (I + NumElts);
25988 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25989 if (M != I) {
25990 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25991 Demanded.setBit(M % NumElts);
25992 }
25993 }
25994 // TODO: Should we try to mask with N1 as well?
25995 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25996 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25997 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25998 SDLoc DL(N);
26001 // Transform the type to a legal type so that the buildvector constant
26002 // elements are not illegal. Make sure that the result is larger than the
26003 // original type, incase the value is split into two (eg i64->i32).
26004 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
26005 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
26006 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
26007 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
26008 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
26009 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
26010 for (int I = 0; I != (int)NumElts; ++I)
26011 if (0 <= Mask[I])
26012 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
26013
26014 // See if a clear mask is legal instead of going via
26015 // XformToShuffleWithZero which loses UNDEF mask elements.
26016 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
26017 return DAG.getBitcast(
26018 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
26019 DAG.getConstant(0, DL, IntVT), ClearMask));
26020
26021 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
26022 return DAG.getBitcast(
26023 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
26024 DAG.getBuildVector(IntVT, DL, AndMask)));
26025 }
26026 }
26027 }
26028
26029 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
26030 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
26031 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
26032 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
26033 return Res;
26034
26035 // If this shuffle only has a single input that is a bitcasted shuffle,
26036 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
26037 // back to their original types.
26038 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
26039 N1.isUndef() && Level < AfterLegalizeVectorOps &&
26040 TLI.isTypeLegal(VT)) {
26041
26043 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
26044 EVT SVT = VT.getScalarType();
26045 EVT InnerVT = BC0->getValueType(0);
26046 EVT InnerSVT = InnerVT.getScalarType();
26047
26048 // Determine which shuffle works with the smaller scalar type.
26049 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
26050 EVT ScaleSVT = ScaleVT.getScalarType();
26051
26052 if (TLI.isTypeLegal(ScaleVT) &&
26053 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
26054 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
26055 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26056 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
26057
26058 // Scale the shuffle masks to the smaller scalar type.
26059 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
26060 SmallVector<int, 8> InnerMask;
26061 SmallVector<int, 8> OuterMask;
26062 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
26063 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
26064
26065 // Merge the shuffle masks.
26066 SmallVector<int, 8> NewMask;
26067 for (int M : OuterMask)
26068 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
26069
26070 // Test for shuffle mask legality over both commutations.
26071 SDValue SV0 = BC0->getOperand(0);
26072 SDValue SV1 = BC0->getOperand(1);
26073 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26074 if (!LegalMask) {
26075 std::swap(SV0, SV1);
26077 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
26078 }
26079
26080 if (LegalMask) {
26081 SV0 = DAG.getBitcast(ScaleVT, SV0);
26082 SV1 = DAG.getBitcast(ScaleVT, SV1);
26083 return DAG.getBitcast(
26084 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
26085 }
26086 }
26087 }
26088 }
26089
26090 // Match shuffles of bitcasts, so long as the mask can be treated as the
26091 // larger type.
26092 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26093 return V;
26094
26095 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26096 // operand, and SV1 as the second operand.
26097 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26098 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26099 auto MergeInnerShuffle =
26100 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26101 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26102 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26103 SmallVectorImpl<int> &Mask) -> bool {
26104 // Don't try to fold splats; they're likely to simplify somehow, or they
26105 // might be free.
26106 if (OtherSVN->isSplat())
26107 return false;
26108
26109 SV0 = SV1 = SDValue();
26110 Mask.clear();
26111
26112 for (unsigned i = 0; i != NumElts; ++i) {
26113 int Idx = SVN->getMaskElt(i);
26114 if (Idx < 0) {
26115 // Propagate Undef.
26116 Mask.push_back(Idx);
26117 continue;
26118 }
26119
26120 if (Commute)
26121 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26122
26123 SDValue CurrentVec;
26124 if (Idx < (int)NumElts) {
26125 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26126 // shuffle mask to identify which vector is actually referenced.
26127 Idx = OtherSVN->getMaskElt(Idx);
26128 if (Idx < 0) {
26129 // Propagate Undef.
26130 Mask.push_back(Idx);
26131 continue;
26132 }
26133 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26134 : OtherSVN->getOperand(1);
26135 } else {
26136 // This shuffle index references an element within N1.
26137 CurrentVec = N1;
26138 }
26139
26140 // Simple case where 'CurrentVec' is UNDEF.
26141 if (CurrentVec.isUndef()) {
26142 Mask.push_back(-1);
26143 continue;
26144 }
26145
26146 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26147 // will be the first or second operand of the combined shuffle.
26148 Idx = Idx % NumElts;
26149 if (!SV0.getNode() || SV0 == CurrentVec) {
26150 // Ok. CurrentVec is the left hand side.
26151 // Update the mask accordingly.
26152 SV0 = CurrentVec;
26153 Mask.push_back(Idx);
26154 continue;
26155 }
26156 if (!SV1.getNode() || SV1 == CurrentVec) {
26157 // Ok. CurrentVec is the right hand side.
26158 // Update the mask accordingly.
26159 SV1 = CurrentVec;
26160 Mask.push_back(Idx + NumElts);
26161 continue;
26162 }
26163
26164 // Last chance - see if the vector is another shuffle and if it
26165 // uses one of the existing candidate shuffle ops.
26166 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26167 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26168 if (InnerIdx < 0) {
26169 Mask.push_back(-1);
26170 continue;
26171 }
26172 SDValue InnerVec = (InnerIdx < (int)NumElts)
26173 ? CurrentSVN->getOperand(0)
26174 : CurrentSVN->getOperand(1);
26175 if (InnerVec.isUndef()) {
26176 Mask.push_back(-1);
26177 continue;
26178 }
26179 InnerIdx %= NumElts;
26180 if (InnerVec == SV0) {
26181 Mask.push_back(InnerIdx);
26182 continue;
26183 }
26184 if (InnerVec == SV1) {
26185 Mask.push_back(InnerIdx + NumElts);
26186 continue;
26187 }
26188 }
26189
26190 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26191 return false;
26192 }
26193
26194 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26195 return true;
26196
26197 // Avoid introducing shuffles with illegal mask.
26198 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26199 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26200 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26201 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26202 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26203 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26204 if (TLI.isShuffleMaskLegal(Mask, VT))
26205 return true;
26206
26207 std::swap(SV0, SV1);
26209 return TLI.isShuffleMaskLegal(Mask, VT);
26210 };
26211
26212 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26213 // Canonicalize shuffles according to rules:
26214 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26215 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26216 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26217 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26219 // The incoming shuffle must be of the same type as the result of the
26220 // current shuffle.
26221 assert(N1->getOperand(0).getValueType() == VT &&
26222 "Shuffle types don't match");
26223
26224 SDValue SV0 = N1->getOperand(0);
26225 SDValue SV1 = N1->getOperand(1);
26226 bool HasSameOp0 = N0 == SV0;
26227 bool IsSV1Undef = SV1.isUndef();
26228 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26229 // Commute the operands of this shuffle so merging below will trigger.
26230 return DAG.getCommutedVectorShuffle(*SVN);
26231 }
26232
26233 // Canonicalize splat shuffles to the RHS to improve merging below.
26234 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26235 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26236 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26237 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26238 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26239 return DAG.getCommutedVectorShuffle(*SVN);
26240 }
26241
26242 // Try to fold according to rules:
26243 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26244 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26245 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26246 // Don't try to fold shuffles with illegal type.
26247 // Only fold if this shuffle is the only user of the other shuffle.
26248 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26249 for (int i = 0; i != 2; ++i) {
26250 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26251 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26252 // The incoming shuffle must be of the same type as the result of the
26253 // current shuffle.
26254 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26255 assert(OtherSV->getOperand(0).getValueType() == VT &&
26256 "Shuffle types don't match");
26257
26258 SDValue SV0, SV1;
26260 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26261 SV0, SV1, Mask)) {
26262 // Check if all indices in Mask are Undef. In case, propagate Undef.
26263 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26264 return DAG.getUNDEF(VT);
26265
26266 return DAG.getVectorShuffle(VT, SDLoc(N),
26267 SV0 ? SV0 : DAG.getUNDEF(VT),
26268 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26269 }
26270 }
26271 }
26272
26273 // Merge shuffles through binops if we are able to merge it with at least
26274 // one other shuffles.
26275 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26276 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26277 unsigned SrcOpcode = N0.getOpcode();
26278 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26279 (N1.isUndef() ||
26280 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26281 // Get binop source ops, or just pass on the undef.
26282 SDValue Op00 = N0.getOperand(0);
26283 SDValue Op01 = N0.getOperand(1);
26284 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26285 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26286 // TODO: We might be able to relax the VT check but we don't currently
26287 // have any isBinOp() that has different result/ops VTs so play safe until
26288 // we have test coverage.
26289 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26290 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26291 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26292 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26293 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26294 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26295 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26296 SmallVectorImpl<int> &Mask, bool LeftOp,
26297 bool Commute) {
26298 SDValue InnerN = Commute ? N1 : N0;
26299 SDValue Op0 = LeftOp ? Op00 : Op01;
26300 SDValue Op1 = LeftOp ? Op10 : Op11;
26301 if (Commute)
26302 std::swap(Op0, Op1);
26303 // Only accept the merged shuffle if we don't introduce undef elements,
26304 // or the inner shuffle already contained undef elements.
26305 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26306 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26307 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26308 Mask) &&
26309 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26310 llvm::none_of(Mask, [](int M) { return M < 0; }));
26311 };
26312
26313 // Ensure we don't increase the number of shuffles - we must merge a
26314 // shuffle from at least one of the LHS and RHS ops.
26315 bool MergedLeft = false;
26316 SDValue LeftSV0, LeftSV1;
26317 SmallVector<int, 4> LeftMask;
26318 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26319 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26320 MergedLeft = true;
26321 } else {
26322 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26323 LeftSV0 = Op00, LeftSV1 = Op10;
26324 }
26325
26326 bool MergedRight = false;
26327 SDValue RightSV0, RightSV1;
26328 SmallVector<int, 4> RightMask;
26329 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26330 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26331 MergedRight = true;
26332 } else {
26333 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26334 RightSV0 = Op01, RightSV1 = Op11;
26335 }
26336
26337 if (MergedLeft || MergedRight) {
26338 SDLoc DL(N);
26340 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26341 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26343 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26344 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26345 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26346 }
26347 }
26348 }
26349 }
26350
26351 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26352 return V;
26353
26354 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26355 // Perform this really late, because it could eliminate knowledge
26356 // of undef elements created by this shuffle.
26357 if (Level < AfterLegalizeTypes)
26358 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26359 LegalOperations))
26360 return V;
26361
26362 return SDValue();
26363}
26364
26365SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26366 EVT VT = N->getValueType(0);
26367 if (!VT.isFixedLengthVector())
26368 return SDValue();
26369
26370 // Try to convert a scalar binop with an extracted vector element to a vector
26371 // binop. This is intended to reduce potentially expensive register moves.
26372 // TODO: Check if both operands are extracted.
26373 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26374 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26375 SDValue Scalar = N->getOperand(0);
26376 unsigned Opcode = Scalar.getOpcode();
26377 EVT VecEltVT = VT.getScalarType();
26378 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26379 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26380 Scalar.getOperand(0).getValueType() == VecEltVT &&
26381 Scalar.getOperand(1).getValueType() == VecEltVT &&
26382 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26383 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26384 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26385 // Match an extract element and get a shuffle mask equivalent.
26386 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26387
26388 for (int i : {0, 1}) {
26389 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26390 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26391 SDValue EE = Scalar.getOperand(i);
26392 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26393 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26394 EE.getOperand(0).getValueType() == VT &&
26395 isa<ConstantSDNode>(EE.getOperand(1))) {
26396 // Mask = {ExtractIndex, undef, undef....}
26397 ShufMask[0] = EE.getConstantOperandVal(1);
26398 // Make sure the shuffle is legal if we are crossing lanes.
26399 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26400 SDLoc DL(N);
26401 SDValue V[] = {EE.getOperand(0),
26402 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26403 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26404 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26405 ShufMask);
26406 }
26407 }
26408 }
26409 }
26410
26411 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26412 // with a VECTOR_SHUFFLE and possible truncate.
26413 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26414 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26415 return SDValue();
26416
26417 // If we have an implicit truncate, truncate here if it is legal.
26418 if (VecEltVT != Scalar.getValueType() &&
26419 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26420 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26421 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26422 }
26423
26424 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26425 if (!ExtIndexC)
26426 return SDValue();
26427
26428 SDValue SrcVec = Scalar.getOperand(0);
26429 EVT SrcVT = SrcVec.getValueType();
26430 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26431 unsigned VTNumElts = VT.getVectorNumElements();
26432 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26433 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26434 SmallVector<int, 8> Mask(SrcNumElts, -1);
26435 Mask[0] = ExtIndexC->getZExtValue();
26436 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26437 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26438 if (!LegalShuffle)
26439 return SDValue();
26440
26441 // If the initial vector is the same size, the shuffle is the result.
26442 if (VT == SrcVT)
26443 return LegalShuffle;
26444
26445 // If not, shorten the shuffled vector.
26446 if (VTNumElts != SrcNumElts) {
26447 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26448 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26449 SrcVT.getVectorElementType(), VTNumElts);
26450 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26451 ZeroIdx);
26452 }
26453 }
26454
26455 return SDValue();
26456}
26457
26458SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26459 EVT VT = N->getValueType(0);
26460 SDValue N0 = N->getOperand(0);
26461 SDValue N1 = N->getOperand(1);
26462 SDValue N2 = N->getOperand(2);
26463 uint64_t InsIdx = N->getConstantOperandVal(2);
26464
26465 // If inserting an UNDEF, just return the original vector.
26466 if (N1.isUndef())
26467 return N0;
26468
26469 // If this is an insert of an extracted vector into an undef vector, we can
26470 // just use the input to the extract if the types match, and can simplify
26471 // in some cases even if they don't.
26472 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26473 N1.getOperand(1) == N2) {
26474 EVT SrcVT = N1.getOperand(0).getValueType();
26475 if (SrcVT == VT)
26476 return N1.getOperand(0);
26477 // TODO: To remove the zero check, need to adjust the offset to
26478 // a multiple of the new src type.
26479 if (isNullConstant(N2)) {
26480 if (VT.knownBitsGE(SrcVT) &&
26481 !(VT.isFixedLengthVector() && SrcVT.isScalableVector()))
26482 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26483 VT, N0, N1.getOperand(0), N2);
26484 else if (VT.knownBitsLE(SrcVT) &&
26485 !(VT.isScalableVector() && SrcVT.isFixedLengthVector()))
26487 VT, N1.getOperand(0), N2);
26488 }
26489 }
26490
26491 // Handle case where we've ended up inserting back into the source vector
26492 // we extracted the subvector from.
26493 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26494 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26495 N1.getOperand(1) == N2)
26496 return N0;
26497
26498 // Simplify scalar inserts into an undef vector:
26499 // insert_subvector undef, (splat X), N2 -> splat X
26500 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26501 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26502 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26503
26504 // If we are inserting a bitcast value into an undef, with the same
26505 // number of elements, just use the bitcast input of the extract.
26506 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26507 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26508 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26510 N1.getOperand(0).getOperand(1) == N2 &&
26512 VT.getVectorElementCount() &&
26514 VT.getSizeInBits()) {
26515 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26516 }
26517
26518 // If both N1 and N2 are bitcast values on which insert_subvector
26519 // would makes sense, pull the bitcast through.
26520 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26521 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26522 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26523 SDValue CN0 = N0.getOperand(0);
26524 SDValue CN1 = N1.getOperand(0);
26525 EVT CN0VT = CN0.getValueType();
26526 EVT CN1VT = CN1.getValueType();
26527 if (CN0VT.isVector() && CN1VT.isVector() &&
26528 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26530 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26531 CN0.getValueType(), CN0, CN1, N2);
26532 return DAG.getBitcast(VT, NewINSERT);
26533 }
26534 }
26535
26536 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26537 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26538 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26539 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26540 N0.getOperand(1).getValueType() == N1.getValueType() &&
26541 N0.getOperand(2) == N2)
26542 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26543 N1, N2);
26544
26545 // Eliminate an intermediate insert into an undef vector:
26546 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26547 // insert_subvector undef, X, 0
26548 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26549 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26550 isNullConstant(N2))
26551 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26552 N1.getOperand(1), N2);
26553
26554 // Push subvector bitcasts to the output, adjusting the index as we go.
26555 // insert_subvector(bitcast(v), bitcast(s), c1)
26556 // -> bitcast(insert_subvector(v, s, c2))
26557 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26558 N1.getOpcode() == ISD::BITCAST) {
26559 SDValue N0Src = peekThroughBitcasts(N0);
26560 SDValue N1Src = peekThroughBitcasts(N1);
26561 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26562 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26563 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26564 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26565 EVT NewVT;
26566 SDLoc DL(N);
26567 SDValue NewIdx;
26568 LLVMContext &Ctx = *DAG.getContext();
26569 ElementCount NumElts = VT.getVectorElementCount();
26570 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26571 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26572 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26573 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26574 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26575 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26576 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26577 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26578 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26579 NumElts.divideCoefficientBy(Scale));
26580 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26581 }
26582 }
26583 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26584 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26585 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26586 return DAG.getBitcast(VT, Res);
26587 }
26588 }
26589 }
26590
26591 // Canonicalize insert_subvector dag nodes.
26592 // Example:
26593 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26594 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26595 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26596 N1.getValueType() == N0.getOperand(1).getValueType()) {
26597 unsigned OtherIdx = N0.getConstantOperandVal(2);
26598 if (InsIdx < OtherIdx) {
26599 // Swap nodes.
26600 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26601 N0.getOperand(0), N1, N2);
26602 AddToWorklist(NewOp.getNode());
26603 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26604 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26605 }
26606 }
26607
26608 // If the input vector is a concatenation, and the insert replaces
26609 // one of the pieces, we can optimize into a single concat_vectors.
26610 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26611 N0.getOperand(0).getValueType() == N1.getValueType() &&
26614 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26615 SmallVector<SDValue, 8> Ops(N0->ops());
26616 Ops[InsIdx / Factor] = N1;
26617 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26618 }
26619
26620 // Simplify source operands based on insertion.
26622 return SDValue(N, 0);
26623
26624 return SDValue();
26625}
26626
26627SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26628 SDValue N0 = N->getOperand(0);
26629
26630 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26631 if (N0->getOpcode() == ISD::FP16_TO_FP)
26632 return N0->getOperand(0);
26633
26634 return SDValue();
26635}
26636
26637SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26638 auto Op = N->getOpcode();
26640 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26641 SDValue N0 = N->getOperand(0);
26642
26643 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26644 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26645 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26647 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26648 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26649 }
26650 }
26651
26652 // Sometimes constants manage to survive very late in the pipeline, e.g.,
26653 // because they are wrapped inside the <1 x f16> type. Try one last time to
26654 // get rid of them.
26655 SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
26656 N->getValueType(0), {N0});
26657 return Folded;
26658}
26659
26660SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26661 SDValue N0 = N->getOperand(0);
26662
26663 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26664 if (N0->getOpcode() == ISD::BF16_TO_FP)
26665 return N0->getOperand(0);
26666
26667 return SDValue();
26668}
26669
26670SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26671 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26672 return visitFP16_TO_FP(N);
26673}
26674
26675SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26676 SDValue N0 = N->getOperand(0);
26677 EVT VT = N0.getValueType();
26678 unsigned Opcode = N->getOpcode();
26679
26680 // VECREDUCE over 1-element vector is just an extract.
26681 if (VT.getVectorElementCount().isScalar()) {
26682 SDLoc dl(N);
26683 SDValue Res =
26685 DAG.getVectorIdxConstant(0, dl));
26686 if (Res.getValueType() != N->getValueType(0))
26687 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26688 return Res;
26689 }
26690
26691 // On an boolean vector an and/or reduction is the same as a umin/umax
26692 // reduction. Convert them if the latter is legal while the former isn't.
26693 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26694 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26696 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26697 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26699 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26700 }
26701
26702 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26703 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26704 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26705 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26706 SDValue Vec = N0.getOperand(0);
26707 SDValue Subvec = N0.getOperand(1);
26708 if ((Opcode == ISD::VECREDUCE_OR &&
26709 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26710 (Opcode == ISD::VECREDUCE_AND &&
26711 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26712 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26713 }
26714
26715 return SDValue();
26716}
26717
26718SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26719 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26720
26721 // FSUB -> FMA combines:
26722 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26723 AddToWorklist(Fused.getNode());
26724 return Fused;
26725 }
26726 return SDValue();
26727}
26728
26729SDValue DAGCombiner::visitVPOp(SDNode *N) {
26730
26731 if (N->getOpcode() == ISD::VP_GATHER)
26732 if (SDValue SD = visitVPGATHER(N))
26733 return SD;
26734
26735 if (N->getOpcode() == ISD::VP_SCATTER)
26736 if (SDValue SD = visitVPSCATTER(N))
26737 return SD;
26738
26739 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26740 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26741 return SD;
26742
26743 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26744 if (SDValue SD = visitVP_STRIDED_STORE(N))
26745 return SD;
26746
26747 // VP operations in which all vector elements are disabled - either by
26748 // determining that the mask is all false or that the EVL is 0 - can be
26749 // eliminated.
26750 bool AreAllEltsDisabled = false;
26751 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26752 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26753 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26754 AreAllEltsDisabled |=
26755 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26756
26757 // This is the only generic VP combine we support for now.
26758 if (!AreAllEltsDisabled) {
26759 switch (N->getOpcode()) {
26760 case ISD::VP_FADD:
26761 return visitVP_FADD(N);
26762 case ISD::VP_FSUB:
26763 return visitVP_FSUB(N);
26764 case ISD::VP_FMA:
26765 return visitFMA<VPMatchContext>(N);
26766 case ISD::VP_SELECT:
26767 return visitVP_SELECT(N);
26768 case ISD::VP_MUL:
26769 return visitMUL<VPMatchContext>(N);
26770 default:
26771 break;
26772 }
26773 return SDValue();
26774 }
26775
26776 // Binary operations can be replaced by UNDEF.
26777 if (ISD::isVPBinaryOp(N->getOpcode()))
26778 return DAG.getUNDEF(N->getValueType(0));
26779
26780 // VP Memory operations can be replaced by either the chain (stores) or the
26781 // chain + undef (loads).
26782 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26783 if (MemSD->writeMem())
26784 return MemSD->getChain();
26785 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26786 }
26787
26788 // Reduction operations return the start operand when no elements are active.
26789 if (ISD::isVPReduction(N->getOpcode()))
26790 return N->getOperand(0);
26791
26792 return SDValue();
26793}
26794
26795SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26796 SDValue Chain = N->getOperand(0);
26797 SDValue Ptr = N->getOperand(1);
26798 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26799
26800 // Check if the memory, where FP state is written to, is used only in a single
26801 // load operation.
26802 LoadSDNode *LdNode = nullptr;
26803 for (auto *U : Ptr->uses()) {
26804 if (U == N)
26805 continue;
26806 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26807 if (LdNode && LdNode != Ld)
26808 return SDValue();
26809 LdNode = Ld;
26810 continue;
26811 }
26812 return SDValue();
26813 }
26814 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26815 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26817 return SDValue();
26818
26819 // Check if the loaded value is used only in a store operation.
26820 StoreSDNode *StNode = nullptr;
26821 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26822 SDUse &U = I.getUse();
26823 if (U.getResNo() == 0) {
26824 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26825 if (StNode)
26826 return SDValue();
26827 StNode = St;
26828 } else {
26829 return SDValue();
26830 }
26831 }
26832 }
26833 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26834 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26835 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26836 return SDValue();
26837
26838 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26839 // environment.
26840 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26841 StNode->getMemOperand());
26842 CombineTo(StNode, Res, false);
26843 return Res;
26844}
26845
26846SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26847 SDValue Chain = N->getOperand(0);
26848 SDValue Ptr = N->getOperand(1);
26849 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26850
26851 // Check if the address of FP state is used also in a store operation only.
26852 StoreSDNode *StNode = nullptr;
26853 for (auto *U : Ptr->uses()) {
26854 if (U == N)
26855 continue;
26856 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26857 if (StNode && StNode != St)
26858 return SDValue();
26859 StNode = St;
26860 continue;
26861 }
26862 return SDValue();
26863 }
26864 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26865 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26866 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26867 return SDValue();
26868
26869 // Check if the stored value is loaded from some location and the loaded
26870 // value is used only in the store operation.
26871 SDValue StValue = StNode->getValue();
26872 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26873 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26874 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26875 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26876 return SDValue();
26877
26878 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26879 // environment.
26880 SDValue Res =
26881 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26882 LdNode->getMemOperand());
26883 return Res;
26884}
26885
26886/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26887/// with the destination vector and a zero vector.
26888/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26889/// vector_shuffle V, Zero, <0, 4, 2, 4>
26890SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26891 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26892
26893 EVT VT = N->getValueType(0);
26894 SDValue LHS = N->getOperand(0);
26895 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26896 SDLoc DL(N);
26897
26898 // Make sure we're not running after operation legalization where it
26899 // may have custom lowered the vector shuffles.
26900 if (LegalOperations)
26901 return SDValue();
26902
26903 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26904 return SDValue();
26905
26906 EVT RVT = RHS.getValueType();
26907 unsigned NumElts = RHS.getNumOperands();
26908
26909 // Attempt to create a valid clear mask, splitting the mask into
26910 // sub elements and checking to see if each is
26911 // all zeros or all ones - suitable for shuffle masking.
26912 auto BuildClearMask = [&](int Split) {
26913 int NumSubElts = NumElts * Split;
26914 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26915
26916 SmallVector<int, 8> Indices;
26917 for (int i = 0; i != NumSubElts; ++i) {
26918 int EltIdx = i / Split;
26919 int SubIdx = i % Split;
26920 SDValue Elt = RHS.getOperand(EltIdx);
26921 // X & undef --> 0 (not undef). So this lane must be converted to choose
26922 // from the zero constant vector (same as if the element had all 0-bits).
26923 if (Elt.isUndef()) {
26924 Indices.push_back(i + NumSubElts);
26925 continue;
26926 }
26927
26928 APInt Bits;
26929 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26930 Bits = Cst->getAPIntValue();
26931 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26932 Bits = CstFP->getValueAPF().bitcastToAPInt();
26933 else
26934 return SDValue();
26935
26936 // Extract the sub element from the constant bit mask.
26937 if (DAG.getDataLayout().isBigEndian())
26938 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26939 else
26940 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26941
26942 if (Bits.isAllOnes())
26943 Indices.push_back(i);
26944 else if (Bits == 0)
26945 Indices.push_back(i + NumSubElts);
26946 else
26947 return SDValue();
26948 }
26949
26950 // Let's see if the target supports this vector_shuffle.
26951 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26952 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26953 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26954 return SDValue();
26955
26956 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26957 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26958 DAG.getBitcast(ClearVT, LHS),
26959 Zero, Indices));
26960 };
26961
26962 // Determine maximum split level (byte level masking).
26963 int MaxSplit = 1;
26964 if (RVT.getScalarSizeInBits() % 8 == 0)
26965 MaxSplit = RVT.getScalarSizeInBits() / 8;
26966
26967 for (int Split = 1; Split <= MaxSplit; ++Split)
26968 if (RVT.getScalarSizeInBits() % Split == 0)
26969 if (SDValue S = BuildClearMask(Split))
26970 return S;
26971
26972 return SDValue();
26973}
26974
26975/// If a vector binop is performed on splat values, it may be profitable to
26976/// extract, scalarize, and insert/splat.
26978 const SDLoc &DL) {
26979 SDValue N0 = N->getOperand(0);
26980 SDValue N1 = N->getOperand(1);
26981 unsigned Opcode = N->getOpcode();
26982 EVT VT = N->getValueType(0);
26983 EVT EltVT = VT.getVectorElementType();
26984 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26985
26986 // TODO: Remove/replace the extract cost check? If the elements are available
26987 // as scalars, then there may be no extract cost. Should we ask if
26988 // inserting a scalar back into a vector is cheap instead?
26989 int Index0, Index1;
26990 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26991 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26992 // Extract element from splat_vector should be free.
26993 // TODO: use DAG.isSplatValue instead?
26994 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26996 if (!Src0 || !Src1 || Index0 != Index1 ||
26997 Src0.getValueType().getVectorElementType() != EltVT ||
26998 Src1.getValueType().getVectorElementType() != EltVT ||
26999 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
27000 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
27001 return SDValue();
27002
27003 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27004 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
27005 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
27006 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
27007
27008 // If all lanes but 1 are undefined, no need to splat the scalar result.
27009 // TODO: Keep track of undefs and use that info in the general case.
27010 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
27011 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
27012 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
27013 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
27014 // build_vec ..undef, (bo X, Y), undef...
27016 Ops[Index0] = ScalarBO;
27017 return DAG.getBuildVector(VT, DL, Ops);
27018 }
27019
27020 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
27021 return DAG.getSplat(VT, DL, ScalarBO);
27022}
27023
27024/// Visit a vector cast operation, like FP_EXTEND.
27025SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
27026 EVT VT = N->getValueType(0);
27027 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
27028 EVT EltVT = VT.getVectorElementType();
27029 unsigned Opcode = N->getOpcode();
27030
27031 SDValue N0 = N->getOperand(0);
27032 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
27033
27034 // TODO: promote operation might be also good here?
27035 int Index0;
27036 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
27037 if (Src0 &&
27038 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
27039 TLI.isExtractVecEltCheap(VT, Index0)) &&
27040 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
27041 TLI.preferScalarizeSplat(N)) {
27042 EVT SrcVT = N0.getValueType();
27043 EVT SrcEltVT = SrcVT.getVectorElementType();
27044 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
27045 SDValue Elt =
27046 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
27047 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
27048 if (VT.isScalableVector())
27049 return DAG.getSplatVector(VT, DL, ScalarBO);
27051 return DAG.getBuildVector(VT, DL, Ops);
27052 }
27053
27054 return SDValue();
27055}
27056
27057/// Visit a binary vector operation, like ADD.
27058SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
27059 EVT VT = N->getValueType(0);
27060 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
27061
27062 SDValue LHS = N->getOperand(0);
27063 SDValue RHS = N->getOperand(1);
27064 unsigned Opcode = N->getOpcode();
27065 SDNodeFlags Flags = N->getFlags();
27066
27067 // Move unary shuffles with identical masks after a vector binop:
27068 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
27069 // --> shuffle (VBinOp A, B), Undef, Mask
27070 // This does not require type legality checks because we are creating the
27071 // same types of operations that are in the original sequence. We do have to
27072 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
27073 // though. This code is adapted from the identical transform in instcombine.
27074 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
27075 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
27076 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
27077 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
27078 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
27079 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
27080 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
27081 RHS.getOperand(0), Flags);
27082 SDValue UndefV = LHS.getOperand(1);
27083 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
27084 }
27085
27086 // Try to sink a splat shuffle after a binop with a uniform constant.
27087 // This is limited to cases where neither the shuffle nor the constant have
27088 // undefined elements because that could be poison-unsafe or inhibit
27089 // demanded elements analysis. It is further limited to not change a splat
27090 // of an inserted scalar because that may be optimized better by
27091 // load-folding or other target-specific behaviors.
27092 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
27093 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
27094 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27095 // binop (splat X), (splat C) --> splat (binop X, C)
27096 SDValue X = Shuf0->getOperand(0);
27097 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
27098 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27099 Shuf0->getMask());
27100 }
27101 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27102 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27103 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27104 // binop (splat C), (splat X) --> splat (binop C, X)
27105 SDValue X = Shuf1->getOperand(0);
27106 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27107 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27108 Shuf1->getMask());
27109 }
27110 }
27111
27112 // The following pattern is likely to emerge with vector reduction ops. Moving
27113 // the binary operation ahead of insertion may allow using a narrower vector
27114 // instruction that has better performance than the wide version of the op:
27115 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27116 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27117 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27118 LHS.getOperand(2) == RHS.getOperand(2) &&
27119 (LHS.hasOneUse() || RHS.hasOneUse())) {
27120 SDValue X = LHS.getOperand(1);
27121 SDValue Y = RHS.getOperand(1);
27122 SDValue Z = LHS.getOperand(2);
27123 EVT NarrowVT = X.getValueType();
27124 if (NarrowVT == Y.getValueType() &&
27125 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27126 LegalOperations)) {
27127 // (binop undef, undef) may not return undef, so compute that result.
27128 SDValue VecC =
27129 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27130 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27131 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27132 }
27133 }
27134
27135 // Make sure all but the first op are undef or constant.
27136 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27137 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27138 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27139 return Op.isUndef() ||
27140 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27141 });
27142 };
27143
27144 // The following pattern is likely to emerge with vector reduction ops. Moving
27145 // the binary operation ahead of the concat may allow using a narrower vector
27146 // instruction that has better performance than the wide version of the op:
27147 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27148 // concat (VBinOp X, Y), VecC
27149 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27150 (LHS.hasOneUse() || RHS.hasOneUse())) {
27151 EVT NarrowVT = LHS.getOperand(0).getValueType();
27152 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27153 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27154 unsigned NumOperands = LHS.getNumOperands();
27155 SmallVector<SDValue, 4> ConcatOps;
27156 for (unsigned i = 0; i != NumOperands; ++i) {
27157 // This constant fold for operands 1 and up.
27158 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27159 RHS.getOperand(i)));
27160 }
27161
27162 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27163 }
27164 }
27165
27166 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
27167 return V;
27168
27169 return SDValue();
27170}
27171
27172SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27173 SDValue N2) {
27174 assert(N0.getOpcode() == ISD::SETCC &&
27175 "First argument must be a SetCC node!");
27176
27177 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27178 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27179
27180 // If we got a simplified select_cc node back from SimplifySelectCC, then
27181 // break it down into a new SETCC node, and a new SELECT node, and then return
27182 // the SELECT node, since we were called with a SELECT node.
27183 if (SCC.getNode()) {
27184 // Check to see if we got a select_cc back (to turn into setcc/select).
27185 // Otherwise, just return whatever node we got back, like fabs.
27186 if (SCC.getOpcode() == ISD::SELECT_CC) {
27187 const SDNodeFlags Flags = N0->getFlags();
27189 N0.getValueType(),
27190 SCC.getOperand(0), SCC.getOperand(1),
27191 SCC.getOperand(4), Flags);
27192 AddToWorklist(SETCC.getNode());
27193 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27194 SCC.getOperand(2), SCC.getOperand(3));
27195 SelectNode->setFlags(Flags);
27196 return SelectNode;
27197 }
27198
27199 return SCC;
27200 }
27201 return SDValue();
27202}
27203
27204/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27205/// being selected between, see if we can simplify the select. Callers of this
27206/// should assume that TheSelect is deleted if this returns true. As such, they
27207/// should return the appropriate thing (e.g. the node) back to the top-level of
27208/// the DAG combiner loop to avoid it being looked at.
27209bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27210 SDValue RHS) {
27211 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27212 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27213 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27214 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27215 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27216 SDValue Sqrt = RHS;
27218 SDValue CmpLHS;
27219 const ConstantFPSDNode *Zero = nullptr;
27220
27221 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27222 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27223 CmpLHS = TheSelect->getOperand(0);
27224 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27225 } else {
27226 // SELECT or VSELECT
27227 SDValue Cmp = TheSelect->getOperand(0);
27228 if (Cmp.getOpcode() == ISD::SETCC) {
27229 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27230 CmpLHS = Cmp.getOperand(0);
27231 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27232 }
27233 }
27234 if (Zero && Zero->isZero() &&
27235 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27236 CC == ISD::SETULT || CC == ISD::SETLT)) {
27237 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27238 CombineTo(TheSelect, Sqrt);
27239 return true;
27240 }
27241 }
27242 }
27243 // Cannot simplify select with vector condition
27244 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27245
27246 // If this is a select from two identical things, try to pull the operation
27247 // through the select.
27248 if (LHS.getOpcode() != RHS.getOpcode() ||
27249 !LHS.hasOneUse() || !RHS.hasOneUse())
27250 return false;
27251
27252 // If this is a load and the token chain is identical, replace the select
27253 // of two loads with a load through a select of the address to load from.
27254 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27255 // constants have been dropped into the constant pool.
27256 if (LHS.getOpcode() == ISD::LOAD) {
27257 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27258 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27259
27260 // Token chains must be identical.
27261 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27262 // Do not let this transformation reduce the number of volatile loads.
27263 // Be conservative for atomics for the moment
27264 // TODO: This does appear to be legal for unordered atomics (see D66309)
27265 !LLD->isSimple() || !RLD->isSimple() ||
27266 // FIXME: If either is a pre/post inc/dec load,
27267 // we'd need to split out the address adjustment.
27268 LLD->isIndexed() || RLD->isIndexed() ||
27269 // If this is an EXTLOAD, the VT's must match.
27270 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27271 // If this is an EXTLOAD, the kind of extension must match.
27272 (LLD->getExtensionType() != RLD->getExtensionType() &&
27273 // The only exception is if one of the extensions is anyext.
27274 LLD->getExtensionType() != ISD::EXTLOAD &&
27275 RLD->getExtensionType() != ISD::EXTLOAD) ||
27276 // FIXME: this discards src value information. This is
27277 // over-conservative. It would be beneficial to be able to remember
27278 // both potential memory locations. Since we are discarding
27279 // src value info, don't do the transformation if the memory
27280 // locations are not in the default address space.
27281 LLD->getPointerInfo().getAddrSpace() != 0 ||
27282 RLD->getPointerInfo().getAddrSpace() != 0 ||
27283 // We can't produce a CMOV of a TargetFrameIndex since we won't
27284 // generate the address generation required.
27287 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27288 LLD->getBasePtr().getValueType()))
27289 return false;
27290
27291 // The loads must not depend on one another.
27292 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27293 return false;
27294
27295 // Check that the select condition doesn't reach either load. If so,
27296 // folding this will induce a cycle into the DAG. If not, this is safe to
27297 // xform, so create a select of the addresses.
27298
27301
27302 // Always fail if LLD and RLD are not independent. TheSelect is a
27303 // predecessor to all Nodes in question so we need not search past it.
27304
27305 Visited.insert(TheSelect);
27306 Worklist.push_back(LLD);
27307 Worklist.push_back(RLD);
27308
27309 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27310 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27311 return false;
27312
27313 SDValue Addr;
27314 if (TheSelect->getOpcode() == ISD::SELECT) {
27315 // We cannot do this optimization if any pair of {RLD, LLD} is a
27316 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27317 // Loads, we only need to check if CondNode is a successor to one of the
27318 // loads. We can further avoid this if there's no use of their chain
27319 // value.
27320 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27321 Worklist.push_back(CondNode);
27322
27323 if ((LLD->hasAnyUseOfValue(1) &&
27324 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27325 (RLD->hasAnyUseOfValue(1) &&
27326 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27327 return false;
27328
27329 Addr = DAG.getSelect(SDLoc(TheSelect),
27330 LLD->getBasePtr().getValueType(),
27331 TheSelect->getOperand(0), LLD->getBasePtr(),
27332 RLD->getBasePtr());
27333 } else { // Otherwise SELECT_CC
27334 // We cannot do this optimization if any pair of {RLD, LLD} is a
27335 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27336 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27337 // one of the loads. We can further avoid this if there's no use of their
27338 // chain value.
27339
27340 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27341 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27342 Worklist.push_back(CondLHS);
27343 Worklist.push_back(CondRHS);
27344
27345 if ((LLD->hasAnyUseOfValue(1) &&
27346 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27347 (RLD->hasAnyUseOfValue(1) &&
27348 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27349 return false;
27350
27351 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27352 LLD->getBasePtr().getValueType(),
27353 TheSelect->getOperand(0),
27354 TheSelect->getOperand(1),
27355 LLD->getBasePtr(), RLD->getBasePtr(),
27356 TheSelect->getOperand(4));
27357 }
27358
27359 SDValue Load;
27360 // It is safe to replace the two loads if they have different alignments,
27361 // but the new load must be the minimum (most restrictive) alignment of the
27362 // inputs.
27363 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27364 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27365 if (!RLD->isInvariant())
27366 MMOFlags &= ~MachineMemOperand::MOInvariant;
27367 if (!RLD->isDereferenceable())
27368 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27369 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27370 // FIXME: Discards pointer and AA info.
27371 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27372 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27373 MMOFlags);
27374 } else {
27375 // FIXME: Discards pointer and AA info.
27376 Load = DAG.getExtLoad(
27378 : LLD->getExtensionType(),
27379 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27380 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27381 }
27382
27383 // Users of the select now use the result of the load.
27384 CombineTo(TheSelect, Load);
27385
27386 // Users of the old loads now use the new load's chain. We know the
27387 // old-load value is dead now.
27388 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27389 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27390 return true;
27391 }
27392
27393 return false;
27394}
27395
27396/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27397/// bitwise 'and'.
27398SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27399 SDValue N1, SDValue N2, SDValue N3,
27400 ISD::CondCode CC) {
27401 // If this is a select where the false operand is zero and the compare is a
27402 // check of the sign bit, see if we can perform the "gzip trick":
27403 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27404 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27405 EVT XType = N0.getValueType();
27406 EVT AType = N2.getValueType();
27407 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27408 return SDValue();
27409
27410 // If the comparison is testing for a positive value, we have to invert
27411 // the sign bit mask, so only do that transform if the target has a bitwise
27412 // 'and not' instruction (the invert is free).
27413 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27414 // (X > -1) ? A : 0
27415 // (X > 0) ? X : 0 <-- This is canonical signed max.
27416 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27417 return SDValue();
27418 } else if (CC == ISD::SETLT) {
27419 // (X < 0) ? A : 0
27420 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27421 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27422 return SDValue();
27423 } else {
27424 return SDValue();
27425 }
27426
27427 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27428 // constant.
27429 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27430 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27431 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27432 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27433 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
27434 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27435 AddToWorklist(Shift.getNode());
27436
27437 if (XType.bitsGT(AType)) {
27438 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27439 AddToWorklist(Shift.getNode());
27440 }
27441
27442 if (CC == ISD::SETGT)
27443 Shift = DAG.getNOT(DL, Shift, AType);
27444
27445 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27446 }
27447 }
27448
27449 unsigned ShCt = XType.getSizeInBits() - 1;
27450 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27451 return SDValue();
27452
27453 SDValue ShiftAmt = DAG.getShiftAmountConstant(ShCt, XType, DL);
27454 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27455 AddToWorklist(Shift.getNode());
27456
27457 if (XType.bitsGT(AType)) {
27458 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27459 AddToWorklist(Shift.getNode());
27460 }
27461
27462 if (CC == ISD::SETGT)
27463 Shift = DAG.getNOT(DL, Shift, AType);
27464
27465 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27466}
27467
27468// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27469SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27470 SDValue N0 = N->getOperand(0);
27471 SDValue N1 = N->getOperand(1);
27472 SDValue N2 = N->getOperand(2);
27473 SDLoc DL(N);
27474
27475 unsigned BinOpc = N1.getOpcode();
27476 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27477 (N1.getResNo() != N2.getResNo()))
27478 return SDValue();
27479
27480 // The use checks are intentionally on SDNode because we may be dealing
27481 // with opcodes that produce more than one SDValue.
27482 // TODO: Do we really need to check N0 (the condition operand of the select)?
27483 // But removing that clause could cause an infinite loop...
27484 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27485 return SDValue();
27486
27487 // Binops may include opcodes that return multiple values, so all values
27488 // must be created/propagated from the newly created binops below.
27489 SDVTList OpVTs = N1->getVTList();
27490
27491 // Fold select(cond, binop(x, y), binop(z, y))
27492 // --> binop(select(cond, x, z), y)
27493 if (N1.getOperand(1) == N2.getOperand(1)) {
27494 SDValue N10 = N1.getOperand(0);
27495 SDValue N20 = N2.getOperand(0);
27496 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27497 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27498 NewBinOp->setFlags(N1->getFlags());
27499 NewBinOp->intersectFlagsWith(N2->getFlags());
27500 return SDValue(NewBinOp.getNode(), N1.getResNo());
27501 }
27502
27503 // Fold select(cond, binop(x, y), binop(x, z))
27504 // --> binop(x, select(cond, y, z))
27505 if (N1.getOperand(0) == N2.getOperand(0)) {
27506 SDValue N11 = N1.getOperand(1);
27507 SDValue N21 = N2.getOperand(1);
27508 // Second op VT might be different (e.g. shift amount type)
27509 if (N11.getValueType() == N21.getValueType()) {
27510 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27511 SDValue NewBinOp =
27512 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27513 NewBinOp->setFlags(N1->getFlags());
27514 NewBinOp->intersectFlagsWith(N2->getFlags());
27515 return SDValue(NewBinOp.getNode(), N1.getResNo());
27516 }
27517 }
27518
27519 // TODO: Handle isCommutativeBinOp patterns as well?
27520 return SDValue();
27521}
27522
27523// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27524SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27525 SDValue N0 = N->getOperand(0);
27526 EVT VT = N->getValueType(0);
27527 bool IsFabs = N->getOpcode() == ISD::FABS;
27528 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27529
27530 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27531 return SDValue();
27532
27533 SDValue Int = N0.getOperand(0);
27534 EVT IntVT = Int.getValueType();
27535
27536 // The operand to cast should be integer.
27537 if (!IntVT.isInteger() || IntVT.isVector())
27538 return SDValue();
27539
27540 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27541 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27542 APInt SignMask;
27543 if (N0.getValueType().isVector()) {
27544 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27545 // 0x7f...) per element and splat it.
27547 if (IsFabs)
27548 SignMask = ~SignMask;
27549 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27550 } else {
27551 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27552 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27553 if (IsFabs)
27554 SignMask = ~SignMask;
27555 }
27556 SDLoc DL(N0);
27557 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27558 DAG.getConstant(SignMask, DL, IntVT));
27559 AddToWorklist(Int.getNode());
27560 return DAG.getBitcast(VT, Int);
27561}
27562
27563/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27564/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27565/// in it. This may be a win when the constant is not otherwise available
27566/// because it replaces two constant pool loads with one.
27567SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27568 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27569 ISD::CondCode CC) {
27571 return SDValue();
27572
27573 // If we are before legalize types, we want the other legalization to happen
27574 // first (for example, to avoid messing with soft float).
27575 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27576 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27577 EVT VT = N2.getValueType();
27578 if (!TV || !FV || !TLI.isTypeLegal(VT))
27579 return SDValue();
27580
27581 // If a constant can be materialized without loads, this does not make sense.
27583 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27584 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27585 return SDValue();
27586
27587 // If both constants have multiple uses, then we won't need to do an extra
27588 // load. The values are likely around in registers for other users.
27589 if (!TV->hasOneUse() && !FV->hasOneUse())
27590 return SDValue();
27591
27592 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27593 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27594 Type *FPTy = Elts[0]->getType();
27595 const DataLayout &TD = DAG.getDataLayout();
27596
27597 // Create a ConstantArray of the two constants.
27598 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27599 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27600 TD.getPrefTypeAlign(FPTy));
27601 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27602
27603 // Get offsets to the 0 and 1 elements of the array, so we can select between
27604 // them.
27605 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27606 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27607 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27608 SDValue Cond =
27609 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27610 AddToWorklist(Cond.getNode());
27611 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27612 AddToWorklist(CstOffset.getNode());
27613 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27614 AddToWorklist(CPIdx.getNode());
27615 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27617 DAG.getMachineFunction()), Alignment);
27618}
27619
27620/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27621/// where 'cond' is the comparison specified by CC.
27622SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27624 bool NotExtCompare) {
27625 // (x ? y : y) -> y.
27626 if (N2 == N3) return N2;
27627
27628 EVT CmpOpVT = N0.getValueType();
27629 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27630 EVT VT = N2.getValueType();
27631 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27632 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27633 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27634
27635 // Determine if the condition we're dealing with is constant.
27636 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27637 AddToWorklist(SCC.getNode());
27638 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27639 // fold select_cc true, x, y -> x
27640 // fold select_cc false, x, y -> y
27641 return !(SCCC->isZero()) ? N2 : N3;
27642 }
27643 }
27644
27645 if (SDValue V =
27646 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27647 return V;
27648
27649 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27650 return V;
27651
27652 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27653 // where y is has a single bit set.
27654 // A plaintext description would be, we can turn the SELECT_CC into an AND
27655 // when the condition can be materialized as an all-ones register. Any
27656 // single bit-test can be materialized as an all-ones register with
27657 // shift-left and shift-right-arith.
27658 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27659 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27660 SDValue AndLHS = N0->getOperand(0);
27661 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27662 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27663 // Shift the tested bit over the sign bit.
27664 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27665 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27666 unsigned ShCt = AndMask.getBitWidth() - 1;
27667 SDValue ShlAmt = DAG.getShiftAmountConstant(AndMask.countl_zero(), VT,
27668 SDLoc(AndLHS));
27669 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27670
27671 // Now arithmetic right shift it all the way over, so the result is
27672 // either all-ones, or zero.
27673 SDValue ShrAmt = DAG.getShiftAmountConstant(ShCt, VT, SDLoc(Shl));
27674 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27675
27676 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27677 }
27678 }
27679 }
27680
27681 // fold select C, 16, 0 -> shl C, 4
27682 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27683 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27684
27685 if ((Fold || Swap) &&
27686 TLI.getBooleanContents(CmpOpVT) ==
27688 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27689
27690 if (Swap) {
27691 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27692 std::swap(N2C, N3C);
27693 }
27694
27695 // If the caller doesn't want us to simplify this into a zext of a compare,
27696 // don't do it.
27697 if (NotExtCompare && N2C->isOne())
27698 return SDValue();
27699
27700 SDValue Temp, SCC;
27701 // zext (setcc n0, n1)
27702 if (LegalTypes) {
27703 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27704 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27705 } else {
27706 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27707 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27708 }
27709
27710 AddToWorklist(SCC.getNode());
27711 AddToWorklist(Temp.getNode());
27712
27713 if (N2C->isOne())
27714 return Temp;
27715
27716 unsigned ShCt = N2C->getAPIntValue().logBase2();
27717 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27718 return SDValue();
27719
27720 // shl setcc result by log2 n2c
27721 return DAG.getNode(
27722 ISD::SHL, DL, N2.getValueType(), Temp,
27723 DAG.getShiftAmountConstant(ShCt, N2.getValueType(), SDLoc(Temp)));
27724 }
27725
27726 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27727 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27728 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27729 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27730 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27731 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27732 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27733 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27734 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27735 SDValue ValueOnZero = N2;
27736 SDValue Count = N3;
27737 // If the condition is NE instead of E, swap the operands.
27738 if (CC == ISD::SETNE)
27739 std::swap(ValueOnZero, Count);
27740 // Check if the value on zero is a constant equal to the bits in the type.
27741 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27742 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27743 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27744 // legal, combine to just cttz.
27745 if ((Count.getOpcode() == ISD::CTTZ ||
27746 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27747 N0 == Count.getOperand(0) &&
27748 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27749 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27750 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27751 // legal, combine to just ctlz.
27752 if ((Count.getOpcode() == ISD::CTLZ ||
27753 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27754 N0 == Count.getOperand(0) &&
27755 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27756 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27757 }
27758 }
27759 }
27760
27761 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27762 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27763 if (!NotExtCompare && N1C && N2C && N3C &&
27764 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27765 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27766 (N1C->isZero() && CC == ISD::SETLT)) &&
27767 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27768 SDValue ASR = DAG.getNode(
27769 ISD::SRA, DL, CmpOpVT, N0,
27770 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27771 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27772 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27773 }
27774
27775 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27776 return S;
27777 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27778 return S;
27779
27780 return SDValue();
27781}
27782
27783/// This is a stub for TargetLowering::SimplifySetCC.
27784SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27785 ISD::CondCode Cond, const SDLoc &DL,
27786 bool foldBooleans) {
27788 DagCombineInfo(DAG, Level, false, this);
27789 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27790}
27791
27792/// Given an ISD::SDIV node expressing a divide by constant, return
27793/// a DAG expression to select that will generate the same value by multiplying
27794/// by a magic number.
27795/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27796SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27797 // when optimising for minimum size, we don't want to expand a div to a mul
27798 // and a shift.
27800 return SDValue();
27801
27803 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
27804 for (SDNode *N : Built)
27805 AddToWorklist(N);
27806 return S;
27807 }
27808
27809 return SDValue();
27810}
27811
27812/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27813/// DAG expression that will generate the same value by right shifting.
27814SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27815 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27816 if (!C)
27817 return SDValue();
27818
27819 // Avoid division by zero.
27820 if (C->isZero())
27821 return SDValue();
27822
27824 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27825 for (SDNode *N : Built)
27826 AddToWorklist(N);
27827 return S;
27828 }
27829
27830 return SDValue();
27831}
27832
27833/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27834/// expression that will generate the same value by multiplying by a magic
27835/// number.
27836/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27837SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27838 // when optimising for minimum size, we don't want to expand a div to a mul
27839 // and a shift.
27841 return SDValue();
27842
27844 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, LegalTypes, Built)) {
27845 for (SDNode *N : Built)
27846 AddToWorklist(N);
27847 return S;
27848 }
27849
27850 return SDValue();
27851}
27852
27853/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27854/// return a DAG expression that will generate the same value.
27855SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27856 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27857 if (!C)
27858 return SDValue();
27859
27860 // Avoid division by zero.
27861 if (C->isZero())
27862 return SDValue();
27863
27865 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27866 for (SDNode *N : Built)
27867 AddToWorklist(N);
27868 return S;
27869 }
27870
27871 return SDValue();
27872}
27873
27874// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27875//
27876// Returns the node that represents `Log2(Op)`. This may create a new node. If
27877// we are unable to compute `Log2(Op)` its return `SDValue()`.
27878//
27879// All nodes will be created at `DL` and the output will be of type `VT`.
27880//
27881// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27882// `AssumeNonZero` if this function should simply assume (not require proving
27883// `Op` is non-zero).
27885 SDValue Op, unsigned Depth,
27886 bool AssumeNonZero) {
27887 assert(VT.isInteger() && "Only integer types are supported!");
27888
27889 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27890 while (true) {
27891 switch (V.getOpcode()) {
27892 case ISD::TRUNCATE:
27893 case ISD::ZERO_EXTEND:
27894 V = V.getOperand(0);
27895 break;
27896 default:
27897 return V;
27898 }
27899 }
27900 };
27901
27902 if (VT.isScalableVector())
27903 return SDValue();
27904
27905 Op = PeekThroughCastsAndTrunc(Op);
27906
27907 // Helper for determining whether a value is a power-2 constant scalar or a
27908 // vector of such elements.
27909 SmallVector<APInt> Pow2Constants;
27910 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27911 if (C->isZero() || C->isOpaque())
27912 return false;
27913 // TODO: We may also be able to support negative powers of 2 here.
27914 if (C->getAPIntValue().isPowerOf2()) {
27915 Pow2Constants.emplace_back(C->getAPIntValue());
27916 return true;
27917 }
27918 return false;
27919 };
27920
27921 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27922 if (!VT.isVector())
27923 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27924 // We need to create a build vector
27925 if (Op.getOpcode() == ISD::SPLAT_VECTOR)
27926 return DAG.getSplat(VT, DL,
27927 DAG.getConstant(Pow2Constants.back().logBase2(), DL,
27928 VT.getScalarType()));
27929 SmallVector<SDValue> Log2Ops;
27930 for (const APInt &Pow2 : Pow2Constants)
27931 Log2Ops.emplace_back(
27932 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27933 return DAG.getBuildVector(VT, DL, Log2Ops);
27934 }
27935
27936 if (Depth >= DAG.MaxRecursionDepth)
27937 return SDValue();
27938
27939 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27940 ToCast = PeekThroughCastsAndTrunc(ToCast);
27941 EVT CurVT = ToCast.getValueType();
27942 if (NewVT == CurVT)
27943 return ToCast;
27944
27945 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27946 return DAG.getBitcast(NewVT, ToCast);
27947
27948 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27949 };
27950
27951 // log2(X << Y) -> log2(X) + Y
27952 if (Op.getOpcode() == ISD::SHL) {
27953 // 1 << Y and X nuw/nsw << Y are all non-zero.
27954 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27955 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27956 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27957 Depth + 1, AssumeNonZero))
27958 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27959 CastToVT(VT, Op.getOperand(1)));
27960 }
27961
27962 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27963 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27964 Op.hasOneUse()) {
27965 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27966 Depth + 1, AssumeNonZero))
27967 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27968 Depth + 1, AssumeNonZero))
27969 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27970 }
27971
27972 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27973 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27974 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27975 Op.hasOneUse()) {
27976 // Use AssumeNonZero as false here. Otherwise we can hit case where
27977 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27978 if (SDValue LogX =
27979 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27980 /*AssumeNonZero*/ false))
27981 if (SDValue LogY =
27982 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27983 /*AssumeNonZero*/ false))
27984 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27985 }
27986
27987 return SDValue();
27988}
27989
27990/// Determines the LogBase2 value for a non-null input value using the
27991/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27992SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27993 bool KnownNonZero, bool InexpensiveOnly,
27994 std::optional<EVT> OutVT) {
27995 EVT VT = OutVT ? *OutVT : V.getValueType();
27996 SDValue InexpensiveLogBase2 =
27997 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27998 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27999 return InexpensiveLogBase2;
28000
28001 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
28002 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
28003 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
28004 return LogBase2;
28005}
28006
28007/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28008/// For the reciprocal, we need to find the zero of the function:
28009/// F(X) = 1/X - A [which has a zero at X = 1/A]
28010/// =>
28011/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
28012/// does not require additional intermediate precision]
28013/// For the last iteration, put numerator N into it to gain more precision:
28014/// Result = N X_i + X_i (N - N A X_i)
28015SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
28016 SDNodeFlags Flags) {
28017 if (LegalDAG)
28018 return SDValue();
28019
28020 // TODO: Handle extended types?
28021 EVT VT = Op.getValueType();
28022 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28023 VT.getScalarType() != MVT::f64)
28024 return SDValue();
28025
28026 // If estimates are explicitly disabled for this function, we're done.
28028 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
28029 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28030 return SDValue();
28031
28032 // Estimates may be explicitly enabled for this type with a custom number of
28033 // refinement steps.
28034 int Iterations = TLI.getDivRefinementSteps(VT, MF);
28035 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
28036 AddToWorklist(Est.getNode());
28037
28038 SDLoc DL(Op);
28039 if (Iterations) {
28040 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
28041
28042 // Newton iterations: Est = Est + Est (N - Arg * Est)
28043 // If this is the last iteration, also multiply by the numerator.
28044 for (int i = 0; i < Iterations; ++i) {
28045 SDValue MulEst = Est;
28046
28047 if (i == Iterations - 1) {
28048 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
28049 AddToWorklist(MulEst.getNode());
28050 }
28051
28052 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
28053 AddToWorklist(NewEst.getNode());
28054
28055 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
28056 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
28057 AddToWorklist(NewEst.getNode());
28058
28059 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28060 AddToWorklist(NewEst.getNode());
28061
28062 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
28063 AddToWorklist(Est.getNode());
28064 }
28065 } else {
28066 // If no iterations are available, multiply with N.
28067 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
28068 AddToWorklist(Est.getNode());
28069 }
28070
28071 return Est;
28072 }
28073
28074 return SDValue();
28075}
28076
28077/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28078/// For the reciprocal sqrt, we need to find the zero of the function:
28079/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28080/// =>
28081/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
28082/// As a result, we precompute A/2 prior to the iteration loop.
28083SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
28084 unsigned Iterations,
28085 SDNodeFlags Flags, bool Reciprocal) {
28086 EVT VT = Arg.getValueType();
28087 SDLoc DL(Arg);
28088 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
28089
28090 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
28091 // this entire sequence requires only one FP constant.
28092 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
28093 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
28094
28095 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
28096 for (unsigned i = 0; i < Iterations; ++i) {
28097 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
28098 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
28099 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28100 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28101 }
28102
28103 // If non-reciprocal square root is requested, multiply the result by Arg.
28104 if (!Reciprocal)
28105 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28106
28107 return Est;
28108}
28109
28110/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28111/// For the reciprocal sqrt, we need to find the zero of the function:
28112/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28113/// =>
28114/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28115SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28116 unsigned Iterations,
28117 SDNodeFlags Flags, bool Reciprocal) {
28118 EVT VT = Arg.getValueType();
28119 SDLoc DL(Arg);
28120 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28121 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28122
28123 // This routine must enter the loop below to work correctly
28124 // when (Reciprocal == false).
28125 assert(Iterations > 0);
28126
28127 // Newton iterations for reciprocal square root:
28128 // E = (E * -0.5) * ((A * E) * E + -3.0)
28129 for (unsigned i = 0; i < Iterations; ++i) {
28130 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28131 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28132 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28133
28134 // When calculating a square root at the last iteration build:
28135 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28136 // (notice a common subexpression)
28137 SDValue LHS;
28138 if (Reciprocal || (i + 1) < Iterations) {
28139 // RSQRT: LHS = (E * -0.5)
28140 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28141 } else {
28142 // SQRT: LHS = (A * E) * -0.5
28143 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28144 }
28145
28146 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28147 }
28148
28149 return Est;
28150}
28151
28152/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28153/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28154/// Op can be zero.
28155SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28156 bool Reciprocal) {
28157 if (LegalDAG)
28158 return SDValue();
28159
28160 // TODO: Handle extended types?
28161 EVT VT = Op.getValueType();
28162 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28163 VT.getScalarType() != MVT::f64)
28164 return SDValue();
28165
28166 // If estimates are explicitly disabled for this function, we're done.
28168 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28169 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28170 return SDValue();
28171
28172 // Estimates may be explicitly enabled for this type with a custom number of
28173 // refinement steps.
28174 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28175
28176 bool UseOneConstNR = false;
28177 if (SDValue Est =
28178 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28179 Reciprocal)) {
28180 AddToWorklist(Est.getNode());
28181
28182 if (Iterations > 0)
28183 Est = UseOneConstNR
28184 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28185 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28186 if (!Reciprocal) {
28187 SDLoc DL(Op);
28188 // Try the target specific test first.
28189 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28190
28191 // The estimate is now completely wrong if the input was exactly 0.0 or
28192 // possibly a denormal. Force the answer to 0.0 or value provided by
28193 // target for those cases.
28194 Est = DAG.getNode(
28195 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28196 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28197 }
28198 return Est;
28199 }
28200
28201 return SDValue();
28202}
28203
28204SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28205 return buildSqrtEstimateImpl(Op, Flags, true);
28206}
28207
28208SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28209 return buildSqrtEstimateImpl(Op, Flags, false);
28210}
28211
28212/// Return true if there is any possibility that the two addresses overlap.
28213bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28214
28215 struct MemUseCharacteristics {
28216 bool IsVolatile;
28217 bool IsAtomic;
28219 int64_t Offset;
28220 LocationSize NumBytes;
28221 MachineMemOperand *MMO;
28222 };
28223
28224 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28225 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28226 int64_t Offset = 0;
28227 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28228 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28229 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28230 ? -1 * C->getSExtValue()
28231 : 0;
28232 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28233 return {LSN->isVolatile(), LSN->isAtomic(),
28234 LSN->getBasePtr(), Offset /*base offset*/,
28235 LocationSize::precise(Size), LSN->getMemOperand()};
28236 }
28237 if (const auto *LN = cast<LifetimeSDNode>(N))
28238 return {false /*isVolatile*/,
28239 /*isAtomic*/ false,
28240 LN->getOperand(1),
28241 (LN->hasOffset()) ? LN->getOffset() : 0,
28242 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28244 (MachineMemOperand *)nullptr};
28245 // Default.
28246 return {false /*isvolatile*/,
28247 /*isAtomic*/ false,
28248 SDValue(),
28249 (int64_t)0 /*offset*/,
28251 (MachineMemOperand *)nullptr};
28252 };
28253
28254 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28255 MUC1 = getCharacteristics(Op1);
28256
28257 // If they are to the same address, then they must be aliases.
28258 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28259 MUC0.Offset == MUC1.Offset)
28260 return true;
28261
28262 // If they are both volatile then they cannot be reordered.
28263 if (MUC0.IsVolatile && MUC1.IsVolatile)
28264 return true;
28265
28266 // Be conservative about atomics for the moment
28267 // TODO: This is way overconservative for unordered atomics (see D66309)
28268 if (MUC0.IsAtomic && MUC1.IsAtomic)
28269 return true;
28270
28271 if (MUC0.MMO && MUC1.MMO) {
28272 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28273 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28274 return false;
28275 }
28276
28277 // If NumBytes is scalable and offset is not 0, conservatively return may
28278 // alias
28279 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28280 MUC0.Offset != 0) ||
28281 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28282 MUC1.Offset != 0))
28283 return true;
28284 // Try to prove that there is aliasing, or that there is no aliasing. Either
28285 // way, we can return now. If nothing can be proved, proceed with more tests.
28286 bool IsAlias;
28287 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28288 DAG, IsAlias))
28289 return IsAlias;
28290
28291 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28292 // either are not known.
28293 if (!MUC0.MMO || !MUC1.MMO)
28294 return true;
28295
28296 // If one operation reads from invariant memory, and the other may store, they
28297 // cannot alias. These should really be checking the equivalent of mayWrite,
28298 // but it only matters for memory nodes other than load /store.
28299 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28300 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28301 return false;
28302
28303 // If we know required SrcValue1 and SrcValue2 have relatively large
28304 // alignment compared to the size and offset of the access, we may be able
28305 // to prove they do not alias. This check is conservative for now to catch
28306 // cases created by splitting vector types, it only works when the offsets are
28307 // multiples of the size of the data.
28308 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28309 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28310 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28311 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28312 LocationSize Size0 = MUC0.NumBytes;
28313 LocationSize Size1 = MUC1.NumBytes;
28314
28315 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28316 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28317 !Size1.isScalable() && Size0 == Size1 &&
28318 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28319 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28320 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28321 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28322 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28323
28324 // There is no overlap between these relatively aligned accesses of
28325 // similar size. Return no alias.
28326 if ((OffAlign0 + static_cast<int64_t>(
28327 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28328 (OffAlign1 + static_cast<int64_t>(
28329 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28330 return false;
28331 }
28332
28333 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28335 : DAG.getSubtarget().useAA();
28336#ifndef NDEBUG
28337 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28339 UseAA = false;
28340#endif
28341
28342 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28343 Size0.hasValue() && Size1.hasValue() &&
28344 // Can't represent a scalable size + fixed offset in LocationSize
28345 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28346 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28347 // Use alias analysis information.
28348 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28349 int64_t Overlap0 =
28350 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28351 int64_t Overlap1 =
28352 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28353 LocationSize Loc0 =
28354 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28355 LocationSize Loc1 =
28356 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28357 if (AA->isNoAlias(
28358 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28359 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28360 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28361 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28362 return false;
28363 }
28364
28365 // Otherwise we have to assume they alias.
28366 return true;
28367}
28368
28369/// Walk up chain skipping non-aliasing memory nodes,
28370/// looking for aliasing nodes and adding them to the Aliases vector.
28371void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28372 SmallVectorImpl<SDValue> &Aliases) {
28373 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28374 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28375
28376 // Get alias information for node.
28377 // TODO: relax aliasing for unordered atomics (see D66309)
28378 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28379
28380 // Starting off.
28381 Chains.push_back(OriginalChain);
28382 unsigned Depth = 0;
28383
28384 // Attempt to improve chain by a single step
28385 auto ImproveChain = [&](SDValue &C) -> bool {
28386 switch (C.getOpcode()) {
28387 case ISD::EntryToken:
28388 // No need to mark EntryToken.
28389 C = SDValue();
28390 return true;
28391 case ISD::LOAD:
28392 case ISD::STORE: {
28393 // Get alias information for C.
28394 // TODO: Relax aliasing for unordered atomics (see D66309)
28395 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28396 cast<LSBaseSDNode>(C.getNode())->isSimple();
28397 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28398 // Look further up the chain.
28399 C = C.getOperand(0);
28400 return true;
28401 }
28402 // Alias, so stop here.
28403 return false;
28404 }
28405
28406 case ISD::CopyFromReg:
28407 // Always forward past CopyFromReg.
28408 C = C.getOperand(0);
28409 return true;
28410
28412 case ISD::LIFETIME_END: {
28413 // We can forward past any lifetime start/end that can be proven not to
28414 // alias the memory access.
28415 if (!mayAlias(N, C.getNode())) {
28416 // Look further up the chain.
28417 C = C.getOperand(0);
28418 return true;
28419 }
28420 return false;
28421 }
28422 default:
28423 return false;
28424 }
28425 };
28426
28427 // Look at each chain and determine if it is an alias. If so, add it to the
28428 // aliases list. If not, then continue up the chain looking for the next
28429 // candidate.
28430 while (!Chains.empty()) {
28431 SDValue Chain = Chains.pop_back_val();
28432
28433 // Don't bother if we've seen Chain before.
28434 if (!Visited.insert(Chain.getNode()).second)
28435 continue;
28436
28437 // For TokenFactor nodes, look at each operand and only continue up the
28438 // chain until we reach the depth limit.
28439 //
28440 // FIXME: The depth check could be made to return the last non-aliasing
28441 // chain we found before we hit a tokenfactor rather than the original
28442 // chain.
28443 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28444 Aliases.clear();
28445 Aliases.push_back(OriginalChain);
28446 return;
28447 }
28448
28449 if (Chain.getOpcode() == ISD::TokenFactor) {
28450 // We have to check each of the operands of the token factor for "small"
28451 // token factors, so we queue them up. Adding the operands to the queue
28452 // (stack) in reverse order maintains the original order and increases the
28453 // likelihood that getNode will find a matching token factor (CSE.)
28454 if (Chain.getNumOperands() > 16) {
28455 Aliases.push_back(Chain);
28456 continue;
28457 }
28458 for (unsigned n = Chain.getNumOperands(); n;)
28459 Chains.push_back(Chain.getOperand(--n));
28460 ++Depth;
28461 continue;
28462 }
28463 // Everything else
28464 if (ImproveChain(Chain)) {
28465 // Updated Chain Found, Consider new chain if one exists.
28466 if (Chain.getNode())
28467 Chains.push_back(Chain);
28468 ++Depth;
28469 continue;
28470 }
28471 // No Improved Chain Possible, treat as Alias.
28472 Aliases.push_back(Chain);
28473 }
28474}
28475
28476/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28477/// (aliasing node.)
28478SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28479 if (OptLevel == CodeGenOptLevel::None)
28480 return OldChain;
28481
28482 // Ops for replacing token factor.
28484
28485 // Accumulate all the aliases to this node.
28486 GatherAllAliases(N, OldChain, Aliases);
28487
28488 // If no operands then chain to entry token.
28489 if (Aliases.empty())
28490 return DAG.getEntryNode();
28491
28492 // If a single operand then chain to it. We don't need to revisit it.
28493 if (Aliases.size() == 1)
28494 return Aliases[0];
28495
28496 // Construct a custom tailored token factor.
28497 return DAG.getTokenFactor(SDLoc(N), Aliases);
28498}
28499
28500// This function tries to collect a bunch of potentially interesting
28501// nodes to improve the chains of, all at once. This might seem
28502// redundant, as this function gets called when visiting every store
28503// node, so why not let the work be done on each store as it's visited?
28504//
28505// I believe this is mainly important because mergeConsecutiveStores
28506// is unable to deal with merging stores of different sizes, so unless
28507// we improve the chains of all the potential candidates up-front
28508// before running mergeConsecutiveStores, it might only see some of
28509// the nodes that will eventually be candidates, and then not be able
28510// to go from a partially-merged state to the desired final
28511// fully-merged state.
28512
28513bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28514 SmallVector<StoreSDNode *, 8> ChainedStores;
28515 StoreSDNode *STChain = St;
28516 // Intervals records which offsets from BaseIndex have been covered. In
28517 // the common case, every store writes to the immediately previous address
28518 // space and thus merged with the previous interval at insertion time.
28519
28520 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28522 IMap::Allocator A;
28523 IMap Intervals(A);
28524
28525 // This holds the base pointer, index, and the offset in bytes from the base
28526 // pointer.
28528
28529 // We must have a base and an offset.
28530 if (!BasePtr.getBase().getNode())
28531 return false;
28532
28533 // Do not handle stores to undef base pointers.
28534 if (BasePtr.getBase().isUndef())
28535 return false;
28536
28537 // Do not handle stores to opaque types
28538 if (St->getMemoryVT().isZeroSized())
28539 return false;
28540
28541 // BaseIndexOffset assumes that offsets are fixed-size, which
28542 // is not valid for scalable vectors where the offsets are
28543 // scaled by `vscale`, so bail out early.
28544 if (St->getMemoryVT().isScalableVT())
28545 return false;
28546
28547 // Add ST's interval.
28548 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28549 std::monostate{});
28550
28551 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28552 if (Chain->getMemoryVT().isScalableVector())
28553 return false;
28554
28555 // If the chain has more than one use, then we can't reorder the mem ops.
28556 if (!SDValue(Chain, 0)->hasOneUse())
28557 break;
28558 // TODO: Relax for unordered atomics (see D66309)
28559 if (!Chain->isSimple() || Chain->isIndexed())
28560 break;
28561
28562 // Find the base pointer and offset for this memory node.
28563 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28564 // Check that the base pointer is the same as the original one.
28565 int64_t Offset;
28566 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28567 break;
28568 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28569 // Make sure we don't overlap with other intervals by checking the ones to
28570 // the left or right before inserting.
28571 auto I = Intervals.find(Offset);
28572 // If there's a next interval, we should end before it.
28573 if (I != Intervals.end() && I.start() < (Offset + Length))
28574 break;
28575 // If there's a previous interval, we should start after it.
28576 if (I != Intervals.begin() && (--I).stop() <= Offset)
28577 break;
28578 Intervals.insert(Offset, Offset + Length, std::monostate{});
28579
28580 ChainedStores.push_back(Chain);
28581 STChain = Chain;
28582 }
28583
28584 // If we didn't find a chained store, exit.
28585 if (ChainedStores.empty())
28586 return false;
28587
28588 // Improve all chained stores (St and ChainedStores members) starting from
28589 // where the store chain ended and return single TokenFactor.
28590 SDValue NewChain = STChain->getChain();
28592 for (unsigned I = ChainedStores.size(); I;) {
28593 StoreSDNode *S = ChainedStores[--I];
28594 SDValue BetterChain = FindBetterChain(S, NewChain);
28595 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28596 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28597 TFOps.push_back(SDValue(S, 0));
28598 ChainedStores[I] = S;
28599 }
28600
28601 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28602 SDValue BetterChain = FindBetterChain(St, NewChain);
28603 SDValue NewST;
28604 if (St->isTruncatingStore())
28605 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28606 St->getBasePtr(), St->getMemoryVT(),
28607 St->getMemOperand());
28608 else
28609 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28610 St->getBasePtr(), St->getMemOperand());
28611
28612 TFOps.push_back(NewST);
28613
28614 // If we improved every element of TFOps, then we've lost the dependence on
28615 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28616 // the beginning to keep relative order consistent with FindBetterChains.
28617 auto hasImprovedChain = [&](SDValue ST) -> bool {
28618 return ST->getOperand(0) != NewChain;
28619 };
28620 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28621 if (AddNewChain)
28622 TFOps.insert(TFOps.begin(), NewChain);
28623
28624 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28625 CombineTo(St, TF);
28626
28627 // Add TF and its operands to the worklist.
28628 AddToWorklist(TF.getNode());
28629 for (const SDValue &Op : TF->ops())
28630 AddToWorklist(Op.getNode());
28631 AddToWorklist(STChain);
28632 return true;
28633}
28634
28635bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28636 if (OptLevel == CodeGenOptLevel::None)
28637 return false;
28638
28640
28641 // We must have a base and an offset.
28642 if (!BasePtr.getBase().getNode())
28643 return false;
28644
28645 // Do not handle stores to undef base pointers.
28646 if (BasePtr.getBase().isUndef())
28647 return false;
28648
28649 // Directly improve a chain of disjoint stores starting at St.
28650 if (parallelizeChainedStores(St))
28651 return true;
28652
28653 // Improve St's Chain..
28654 SDValue BetterChain = FindBetterChain(St, St->getChain());
28655 if (St->getChain() != BetterChain) {
28656 replaceStoreChain(St, BetterChain);
28657 return true;
28658 }
28659 return false;
28660}
28661
28662/// This is the entry point for the file.
28664 CodeGenOptLevel OptLevel) {
28665 /// This is the main entry point to this class.
28666 DAGCombiner(*this, AA, OptLevel).Run(Level);
28667}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG, const SDLoc &DL)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const SDNodeFlags Flags, const TargetLowering &TLI)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, const SDLoc &DL, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:531
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:1125
bool isNegative() const
Definition: APFloat.h:1360
bool isNormal() const
Definition: APFloat.h:1364
bool isDenormal() const
Definition: APFloat.h:1361
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1343
const fltSemantics & getSemantics() const
Definition: APFloat.h:1368
bool isNaN() const
Definition: APFloat.h:1358
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition: APFloat.h:1003
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
bool isLargest() const
Definition: APFloat.h:1376
bool isIEEE() const
Definition: APFloat.h:1378
bool isInfinity() const
Definition: APFloat.h:1357
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:429
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:209
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1629
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
APInt abs() const
Get the absolute value.
Definition: APInt.h:1753
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:351
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1162
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:238
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:360
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:446
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1091
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:189
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:309
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1229
int32_t exactLogBase2() const
Definition: APInt.h:1741
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1557
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1491
unsigned countLeadingZeros() const
Definition: APInt.h:1565
unsigned logBase2() const
Definition: APInt.h:1719
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:490
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:455
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:451
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:468
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1130
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:286
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:276
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:369
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1615
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1201
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:635
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1292
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:109
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:214
bool isBigEndian() const
Definition: DataLayout.h:215
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:480
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:881
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
iterator end()
Definition: DenseMap.h:84
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:311
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:366
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:970
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:569
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:491
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:495
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:453
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:844
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:489
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:677
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:878
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:490
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:561
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:496
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:484
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:861
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
Definition: SelectionDAG.h:502
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:578
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
std::optional< bool > isBoolConstant(SDValue N, bool AllowTruncation=false) const
Check if a value \op N is a constant using the target's BooleanContent for its type.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:572
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:894
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:924
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:96
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:347
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:442
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:718
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
void reserve(size_type N)
Definition: SmallVector.h:677
iterator erase(const_iterator CI)
Definition: SmallVector.h:751
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:697
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:819
void resize(size_type N)
Definition: SmallVector.h:652
void push_back(const T &Elt)
Definition: SmallVector.h:427
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:300
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:254
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2197
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2202
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2207
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2212
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Entry
Definition: COFF.h:826
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
Definition: ISDOpcodes.h:1330
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1415
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1284
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:501
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:820
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1400
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1404
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:850
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1414
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:943
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1455
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1397
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1401
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:787
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:660
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1280
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1416
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1409
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1021
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ TargetConstantFP
Definition: ISDOpcodes.h:165
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:886
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1372
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1008
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ TargetFrameIndex
Definition: ISDOpcodes.h:172
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:839
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1347
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
Definition: ISDOpcodes.h:1342
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1234
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1417
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:952
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1027
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1050
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:267
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1398
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:981
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition: ISDOpcodes.h:668
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:861
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:885
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1405
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1113
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1183
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1055
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:320
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1544
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1654
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1629
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1649
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1470
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1540
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1540
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1611
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1623
@ VecLoad
Definition: NVPTX.h:89
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:893
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
Opcode_match m_Opc(unsigned Opcode)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS > m_Sra(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
TernaryOpc_match< T0_P, T1_P, T2_P > m_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
NUses_match< 1, Value_match > m_OneUse()
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
bool sd_context_match(SDValue N, const MatchContext &Ctx, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4645
constexpr double e
Definition: MathExtras.h:47
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:353
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1553
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2406
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2062
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt & operator+=(DynamicAPInt &A, int64_t B)
Definition: DynamicAPInt.h:516
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1535
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1503
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2045
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:337
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:333
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:329
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:270
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:343
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool knownBitsLE(EVT VT) const
Return true if we know at compile time this has fewer than or the same bits as VT.
Definition: ValueTypes.h:269
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition: ValueTypes.h:258
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:285
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:79
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:56
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:312
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...