LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG.
175
176 /// Mapping from an SDNode to its position on the worklist.
177 ///
178 /// This is used to find and remove nodes from the worklist (by nulling
179 /// them) when they are deleted from the underlying DAG. It relies on
180 /// stable indices of nodes within the worklist.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Set of nodes which have been combined (at least once).
189 ///
190 /// This is used to allow us to reliably add any operands of a DAG node
191 /// which have not yet been combined to the worklist.
192 SmallPtrSet<SDNode *, 32> CombinedNodes;
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // AA - Used for DAG load/store alias analysis.
203 AliasAnalysis *AA;
204
205 /// When an instruction is simplified, add all users of the instruction to
206 /// the work lists because they might get more simplified now.
207 void AddUsersToWorklist(SDNode *N) {
208 for (SDNode *Node : N->uses())
209 AddToWorklist(Node);
210 }
211
212 /// Convenient shorthand to add a node and all of its user to the worklist.
213 void AddToWorklistWithUsers(SDNode *N) {
214 AddUsersToWorklist(N);
215 AddToWorklist(N);
216 }
217
218 // Prune potentially dangling nodes. This is called after
219 // any visit to a node, but should also be called during a visit after any
220 // failed combine which may have created a DAG node.
221 void clearAddedDanglingWorklistEntries() {
222 // Check any nodes added to the worklist to see if they are prunable.
223 while (!PruningList.empty()) {
224 auto *N = PruningList.pop_back_val();
225 if (N->use_empty())
226 recursivelyDeleteUnusedNodes(N);
227 }
228 }
229
230 SDNode *getNextWorklistEntry() {
231 // Before we do any work, remove nodes that are not in use.
232 clearAddedDanglingWorklistEntries();
233 SDNode *N = nullptr;
234 // The Worklist holds the SDNodes in order, but it may contain null
235 // entries.
236 while (!N && !Worklist.empty()) {
237 N = Worklist.pop_back_val();
238 }
239
240 if (N) {
241 bool GoodWorklistEntry = WorklistMap.erase(N);
242 (void)GoodWorklistEntry;
243 assert(GoodWorklistEntry &&
244 "Found a worklist entry without a corresponding map entry!");
245 }
246 return N;
247 }
248
249 /// Call the node-specific routine that folds each particular type of node.
250 SDValue visit(SDNode *N);
251
252 public:
253 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
254 : DAG(D), TLI(D.getTargetLoweringInfo()),
255 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
277 assert(N->getOpcode() != ISD::DELETED_NODE &&
278 "Deleted Node added to Worklist");
279
280 // Skip handle nodes as they can't usefully be combined and confuse the
281 // zero-use deletion strategy.
282 if (N->getOpcode() == ISD::HANDLENODE)
283 return;
284
285 if (IsCandidateForPruning)
286 ConsiderForPruning(N);
287
288 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
289 Worklist.push_back(N);
290 }
291
292 /// Remove all instances of N from the worklist.
293 void removeFromWorklist(SDNode *N) {
294 CombinedNodes.erase(N);
295 PruningList.remove(N);
296 StoreRootCountMap.erase(N);
297
298 auto It = WorklistMap.find(N);
299 if (It == WorklistMap.end())
300 return; // Not in the worklist.
301
302 // Null out the entry rather than erasing it to avoid a linear operation.
303 Worklist[It->second] = nullptr;
304 WorklistMap.erase(It);
305 }
306
307 void deleteAndRecombine(SDNode *N);
308 bool recursivelyDeleteUnusedNodes(SDNode *N);
309
310 /// Replaces all uses of the results of one DAG node with new values.
311 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
312 bool AddTo = true);
313
314 /// Replaces all uses of the results of one DAG node with new values.
315 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
316 return CombineTo(N, &Res, 1, AddTo);
317 }
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
321 bool AddTo = true) {
322 SDValue To[] = { Res0, Res1 };
323 return CombineTo(N, To, 2, AddTo);
324 }
325
326 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
327
328 private:
329 unsigned MaximumLegalStoreInBits;
330
331 /// Check the specified integer node value to see if it can be simplified or
332 /// if things it uses can be simplified by bit propagation.
333 /// If so, return true.
334 bool SimplifyDemandedBits(SDValue Op) {
335 unsigned BitWidth = Op.getScalarValueSizeInBits();
337 return SimplifyDemandedBits(Op, DemandedBits);
338 }
339
340 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
341 EVT VT = Op.getValueType();
342 APInt DemandedElts = VT.isFixedLengthVector()
344 : APInt(1, 1);
345 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
346 }
347
348 /// Check the specified vector node value to see if it can be simplified or
349 /// if things it uses can be simplified as it only uses some of the
350 /// elements. If so, return true.
351 bool SimplifyDemandedVectorElts(SDValue Op) {
352 // TODO: For now just pretend it cannot be simplified.
353 if (Op.getValueType().isScalableVector())
354 return false;
355
356 unsigned NumElts = Op.getValueType().getVectorNumElements();
357 APInt DemandedElts = APInt::getAllOnes(NumElts);
358 return SimplifyDemandedVectorElts(Op, DemandedElts);
359 }
360
361 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
362 const APInt &DemandedElts,
363 bool AssumeSingleUse = false);
364 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
365 bool AssumeSingleUse = false);
366
367 bool CombineToPreIndexedLoadStore(SDNode *N);
368 bool CombineToPostIndexedLoadStore(SDNode *N);
369 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
370 bool SliceUpLoad(SDNode *N);
371
372 // Looks up the chain to find a unique (unaliased) store feeding the passed
373 // load. If no such store is found, returns a nullptr.
374 // Note: This will look past a CALLSEQ_START if the load is chained to it so
375 // so that it can find stack stores for byval params.
376 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
377 // Scalars have size 0 to distinguish from singleton vectors.
378 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
379 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
380 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
381
382 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
383 /// load.
384 ///
385 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
386 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
387 /// \param EltNo index of the vector element to load.
388 /// \param OriginalLoad load that EVE came from to be replaced.
389 /// \returns EVE on success SDValue() on failure.
390 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
391 SDValue EltNo,
392 LoadSDNode *OriginalLoad);
393 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
394 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
395 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
396 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
397 SDValue PromoteIntBinOp(SDValue Op);
398 SDValue PromoteIntShiftOp(SDValue Op);
399 SDValue PromoteExtend(SDValue Op);
400 bool PromoteLoad(SDValue Op);
401
402 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
403 SDValue RHS, SDValue True, SDValue False,
405
406 /// Call the node-specific routine that knows how to fold each
407 /// particular type of node. If that doesn't do anything, try the
408 /// target-specific DAG combines.
409 SDValue combine(SDNode *N);
410
411 // Visitation implementation - Implement dag node combining for different
412 // node types. The semantics are as follows:
413 // Return Value:
414 // SDValue.getNode() == 0 - No change was made
415 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
416 // otherwise - N should be replaced by the returned Operand.
417 //
418 SDValue visitTokenFactor(SDNode *N);
419 SDValue visitMERGE_VALUES(SDNode *N);
420 SDValue visitADD(SDNode *N);
421 SDValue visitADDLike(SDNode *N);
422 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
423 SDValue visitSUB(SDNode *N);
424 SDValue visitADDSAT(SDNode *N);
425 SDValue visitSUBSAT(SDNode *N);
426 SDValue visitADDC(SDNode *N);
427 SDValue visitADDO(SDNode *N);
428 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
429 SDValue visitSUBC(SDNode *N);
430 SDValue visitSUBO(SDNode *N);
431 SDValue visitADDE(SDNode *N);
432 SDValue visitUADDO_CARRY(SDNode *N);
433 SDValue visitSADDO_CARRY(SDNode *N);
434 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
435 SDNode *N);
436 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
437 SDNode *N);
438 SDValue visitSUBE(SDNode *N);
439 SDValue visitUSUBO_CARRY(SDNode *N);
440 SDValue visitSSUBO_CARRY(SDNode *N);
441 SDValue visitMUL(SDNode *N);
442 SDValue visitMULFIX(SDNode *N);
443 SDValue useDivRem(SDNode *N);
444 SDValue visitSDIV(SDNode *N);
445 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
446 SDValue visitUDIV(SDNode *N);
447 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
448 SDValue visitREM(SDNode *N);
449 SDValue visitMULHU(SDNode *N);
450 SDValue visitMULHS(SDNode *N);
451 SDValue visitAVG(SDNode *N);
452 SDValue visitABD(SDNode *N);
453 SDValue visitSMUL_LOHI(SDNode *N);
454 SDValue visitUMUL_LOHI(SDNode *N);
455 SDValue visitMULO(SDNode *N);
456 SDValue visitIMINMAX(SDNode *N);
457 SDValue visitAND(SDNode *N);
458 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
459 SDValue visitOR(SDNode *N);
460 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
461 SDValue visitXOR(SDNode *N);
462 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
463 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
464 SDValue visitSHL(SDNode *N);
465 SDValue visitSRA(SDNode *N);
466 SDValue visitSRL(SDNode *N);
467 SDValue visitFunnelShift(SDNode *N);
468 SDValue visitSHLSAT(SDNode *N);
469 SDValue visitRotate(SDNode *N);
470 SDValue visitABS(SDNode *N);
471 SDValue visitBSWAP(SDNode *N);
472 SDValue visitBITREVERSE(SDNode *N);
473 SDValue visitCTLZ(SDNode *N);
474 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
475 SDValue visitCTTZ(SDNode *N);
476 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
477 SDValue visitCTPOP(SDNode *N);
478 SDValue visitSELECT(SDNode *N);
479 SDValue visitVSELECT(SDNode *N);
480 SDValue visitVP_SELECT(SDNode *N);
481 SDValue visitSELECT_CC(SDNode *N);
482 SDValue visitSETCC(SDNode *N);
483 SDValue visitSETCCCARRY(SDNode *N);
484 SDValue visitSIGN_EXTEND(SDNode *N);
485 SDValue visitZERO_EXTEND(SDNode *N);
486 SDValue visitANY_EXTEND(SDNode *N);
487 SDValue visitAssertExt(SDNode *N);
488 SDValue visitAssertAlign(SDNode *N);
489 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491 SDValue visitTRUNCATE(SDNode *N);
492 SDValue visitBITCAST(SDNode *N);
493 SDValue visitFREEZE(SDNode *N);
494 SDValue visitBUILD_PAIR(SDNode *N);
495 SDValue visitFADD(SDNode *N);
496 SDValue visitVP_FADD(SDNode *N);
497 SDValue visitVP_FSUB(SDNode *N);
498 SDValue visitSTRICT_FADD(SDNode *N);
499 SDValue visitFSUB(SDNode *N);
500 SDValue visitFMUL(SDNode *N);
501 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502 SDValue visitFMAD(SDNode *N);
503 SDValue visitFDIV(SDNode *N);
504 SDValue visitFREM(SDNode *N);
505 SDValue visitFSQRT(SDNode *N);
506 SDValue visitFCOPYSIGN(SDNode *N);
507 SDValue visitFPOW(SDNode *N);
508 SDValue visitSINT_TO_FP(SDNode *N);
509 SDValue visitUINT_TO_FP(SDNode *N);
510 SDValue visitFP_TO_SINT(SDNode *N);
511 SDValue visitFP_TO_UINT(SDNode *N);
512 SDValue visitXRINT(SDNode *N);
513 SDValue visitFP_ROUND(SDNode *N);
514 SDValue visitFP_EXTEND(SDNode *N);
515 SDValue visitFNEG(SDNode *N);
516 SDValue visitFABS(SDNode *N);
517 SDValue visitFCEIL(SDNode *N);
518 SDValue visitFTRUNC(SDNode *N);
519 SDValue visitFFREXP(SDNode *N);
520 SDValue visitFFLOOR(SDNode *N);
521 SDValue visitFMinMax(SDNode *N);
522 SDValue visitBRCOND(SDNode *N);
523 SDValue visitBR_CC(SDNode *N);
524 SDValue visitLOAD(SDNode *N);
525
526 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529
530 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531
532 SDValue visitSTORE(SDNode *N);
533 SDValue visitLIFETIME_END(SDNode *N);
534 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
535 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
536 SDValue visitBUILD_VECTOR(SDNode *N);
537 SDValue visitCONCAT_VECTORS(SDNode *N);
538 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
539 SDValue visitVECTOR_SHUFFLE(SDNode *N);
540 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
541 SDValue visitINSERT_SUBVECTOR(SDNode *N);
542 SDValue visitMLOAD(SDNode *N);
543 SDValue visitMSTORE(SDNode *N);
544 SDValue visitMGATHER(SDNode *N);
545 SDValue visitMSCATTER(SDNode *N);
546 SDValue visitVPGATHER(SDNode *N);
547 SDValue visitVPSCATTER(SDNode *N);
548 SDValue visitVP_STRIDED_LOAD(SDNode *N);
549 SDValue visitVP_STRIDED_STORE(SDNode *N);
550 SDValue visitFP_TO_FP16(SDNode *N);
551 SDValue visitFP16_TO_FP(SDNode *N);
552 SDValue visitFP_TO_BF16(SDNode *N);
553 SDValue visitBF16_TO_FP(SDNode *N);
554 SDValue visitVECREDUCE(SDNode *N);
555 SDValue visitVPOp(SDNode *N);
556 SDValue visitGET_FPENV_MEM(SDNode *N);
557 SDValue visitSET_FPENV_MEM(SDNode *N);
558
559 template <class MatchContextClass>
560 SDValue visitFADDForFMACombine(SDNode *N);
561 template <class MatchContextClass>
562 SDValue visitFSUBForFMACombine(SDNode *N);
563 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
564
565 SDValue XformToShuffleWithZero(SDNode *N);
566 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
567 const SDLoc &DL,
568 SDNode *N,
569 SDValue N0,
570 SDValue N1);
571 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
572 SDValue N1, SDNodeFlags Flags);
573 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
574 SDValue N1, SDNodeFlags Flags);
575 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
576 EVT VT, SDValue N0, SDValue N1,
577 SDNodeFlags Flags = SDNodeFlags());
578
579 SDValue visitShiftByConstant(SDNode *N);
580
581 SDValue foldSelectOfConstants(SDNode *N);
582 SDValue foldVSelectOfConstants(SDNode *N);
583 SDValue foldBinOpIntoSelect(SDNode *BO);
584 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
585 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
586 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
587 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
589 bool NotExtCompare = false);
590 SDValue convertSelectOfFPConstantsToLoadOffset(
591 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
593 SDValue foldSignChangeInBitcast(SDNode *N);
594 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
596 SDValue foldSelectOfBinops(SDNode *N);
597 SDValue foldSextSetcc(SDNode *N);
598 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
599 const SDLoc &DL);
600 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
601 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
602 SDValue unfoldMaskedMerge(SDNode *N);
603 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
604 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
605 const SDLoc &DL, bool foldBooleans);
606 SDValue rebuildSetCC(SDValue N);
607
608 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
609 SDValue &CC, bool MatchStrict = false) const;
610 bool isOneUseSetCC(SDValue N) const;
611
612 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
613 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
614
615 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
616 unsigned HiOp);
617 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
618 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
619 const TargetLowering &TLI);
620
621 SDValue CombineExtLoad(SDNode *N);
622 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
623 SDValue combineRepeatedFPDivisors(SDNode *N);
624 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
625 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
626 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
628 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
629 SDValue BuildSDIV(SDNode *N);
630 SDValue BuildSDIVPow2(SDNode *N);
631 SDValue BuildUDIV(SDNode *N);
632 SDValue BuildSREMPow2(SDNode *N);
633 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
634 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
635 bool KnownNeverZero = false,
636 bool InexpensiveOnly = false,
637 std::optional<EVT> OutVT = std::nullopt);
638 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
639 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
640 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
642 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
643 SDNodeFlags Flags, bool Reciprocal);
644 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
645 SDNodeFlags Flags, bool Reciprocal);
646 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
647 bool DemandHighBits = true);
648 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
649 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
650 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
651 unsigned PosOpcode, unsigned NegOpcode,
652 const SDLoc &DL);
653 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
654 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
655 unsigned PosOpcode, unsigned NegOpcode,
656 const SDLoc &DL);
657 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
658 SDValue MatchLoadCombine(SDNode *N);
659 SDValue mergeTruncStores(StoreSDNode *N);
660 SDValue reduceLoadWidth(SDNode *N);
661 SDValue ReduceLoadOpStoreWidth(SDNode *N);
663 SDValue TransformFPLoadStorePair(SDNode *N);
664 SDValue convertBuildVecZextToZext(SDNode *N);
665 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
666 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
667 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
668 SDValue reduceBuildVecToShuffle(SDNode *N);
669 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
670 ArrayRef<int> VectorMask, SDValue VecIn1,
671 SDValue VecIn2, unsigned LeftIdx,
672 bool DidSplitVec);
673 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
674
675 /// Walk up chain skipping non-aliasing memory nodes,
676 /// looking for aliasing nodes and adding them to the Aliases vector.
677 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
678 SmallVectorImpl<SDValue> &Aliases);
679
680 /// Return true if there is any possibility that the two addresses overlap.
681 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
682
683 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
684 /// chain (aliasing node.)
685 SDValue FindBetterChain(SDNode *N, SDValue Chain);
686
687 /// Try to replace a store and any possibly adjacent stores on
688 /// consecutive chains with better chains. Return true only if St is
689 /// replaced.
690 ///
691 /// Notice that other chains may still be replaced even if the function
692 /// returns false.
693 bool findBetterNeighborChains(StoreSDNode *St);
694
695 // Helper for findBetterNeighborChains. Walk up store chain add additional
696 // chained stores that do not overlap and can be parallelized.
697 bool parallelizeChainedStores(StoreSDNode *St);
698
699 /// Holds a pointer to an LSBaseSDNode as well as information on where it
700 /// is located in a sequence of memory operations connected by a chain.
701 struct MemOpLink {
702 // Ptr to the mem node.
703 LSBaseSDNode *MemNode;
704
705 // Offset from the base ptr.
706 int64_t OffsetFromBase;
707
708 MemOpLink(LSBaseSDNode *N, int64_t Offset)
709 : MemNode(N), OffsetFromBase(Offset) {}
710 };
711
712 // Classify the origin of a stored value.
713 enum class StoreSource { Unknown, Constant, Extract, Load };
714 StoreSource getStoreSource(SDValue StoreVal) {
715 switch (StoreVal.getOpcode()) {
716 case ISD::Constant:
717 case ISD::ConstantFP:
718 return StoreSource::Constant;
722 return StoreSource::Constant;
723 return StoreSource::Unknown;
726 return StoreSource::Extract;
727 case ISD::LOAD:
728 return StoreSource::Load;
729 default:
730 return StoreSource::Unknown;
731 }
732 }
733
734 /// This is a helper function for visitMUL to check the profitability
735 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
736 /// MulNode is the original multiply, AddNode is (add x, c1),
737 /// and ConstNode is c2.
738 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
739 SDValue ConstNode);
740
741 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
742 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
743 /// the type of the loaded value to be extended.
744 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
745 EVT LoadResultTy, EVT &ExtVT);
746
747 /// Helper function to calculate whether the given Load/Store can have its
748 /// width reduced to ExtVT.
749 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
750 EVT &MemVT, unsigned ShAmt = 0);
751
752 /// Used by BackwardsPropagateMask to find suitable loads.
753 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
754 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
755 ConstantSDNode *Mask, SDNode *&NodeToMask);
756 /// Attempt to propagate a given AND node back to load leaves so that they
757 /// can be combined into narrow loads.
758 bool BackwardsPropagateMask(SDNode *N);
759
760 /// Helper function for mergeConsecutiveStores which merges the component
761 /// store chains.
762 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
763 unsigned NumStores);
764
765 /// Helper function for mergeConsecutiveStores which checks if all the store
766 /// nodes have the same underlying object. We can still reuse the first
767 /// store's pointer info if all the stores are from the same object.
768 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
769
770 /// This is a helper function for mergeConsecutiveStores. When the source
771 /// elements of the consecutive stores are all constants or all extracted
772 /// vector elements, try to merge them into one larger store introducing
773 /// bitcasts if necessary. \return True if a merged store was created.
774 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
775 EVT MemVT, unsigned NumStores,
776 bool IsConstantSrc, bool UseVector,
777 bool UseTrunc);
778
779 /// This is a helper function for mergeConsecutiveStores. Stores that
780 /// potentially may be merged with St are placed in StoreNodes. RootNode is
781 /// a chain predecessor to all store candidates.
782 void getStoreMergeCandidates(StoreSDNode *St,
783 SmallVectorImpl<MemOpLink> &StoreNodes,
784 SDNode *&Root);
785
786 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
787 /// have indirect dependency through their operands. RootNode is the
788 /// predecessor to all stores calculated by getStoreMergeCandidates and is
789 /// used to prune the dependency check. \return True if safe to merge.
790 bool checkMergeStoreCandidatesForDependencies(
791 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
792 SDNode *RootNode);
793
794 /// This is a helper function for mergeConsecutiveStores. Given a list of
795 /// store candidates, find the first N that are consecutive in memory.
796 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
797 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
798 int64_t ElementSizeBytes) const;
799
800 /// This is a helper function for mergeConsecutiveStores. It is used for
801 /// store chains that are composed entirely of constant values.
802 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
803 unsigned NumConsecutiveStores,
804 EVT MemVT, SDNode *Root, bool AllowVectors);
805
806 /// This is a helper function for mergeConsecutiveStores. It is used for
807 /// store chains that are composed entirely of extracted vector elements.
808 /// When extracting multiple vector elements, try to store them in one
809 /// vector store rather than a sequence of scalar stores.
810 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
811 unsigned NumConsecutiveStores, EVT MemVT,
812 SDNode *Root);
813
814 /// This is a helper function for mergeConsecutiveStores. It is used for
815 /// store chains that are composed entirely of loaded values.
816 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
817 unsigned NumConsecutiveStores, EVT MemVT,
818 SDNode *Root, bool AllowVectors,
819 bool IsNonTemporalStore, bool IsNonTemporalLoad);
820
821 /// Merge consecutive store operations into a wide store.
822 /// This optimization uses wide integers or vectors when possible.
823 /// \return true if stores were merged.
824 bool mergeConsecutiveStores(StoreSDNode *St);
825
826 /// Try to transform a truncation where C is a constant:
827 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
828 ///
829 /// \p N needs to be a truncation and its first operand an AND. Other
830 /// requirements are checked by the function (e.g. that trunc is
831 /// single-use) and if missed an empty SDValue is returned.
832 SDValue distributeTruncateThroughAnd(SDNode *N);
833
834 /// Helper function to determine whether the target supports operation
835 /// given by \p Opcode for type \p VT, that is, whether the operation
836 /// is legal or custom before legalizing operations, and whether is
837 /// legal (but not custom) after legalization.
838 bool hasOperation(unsigned Opcode, EVT VT) {
839 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
840 }
841
842 public:
843 /// Runs the dag combiner on all nodes in the work list
844 void Run(CombineLevel AtLevel);
845
846 SelectionDAG &getDAG() const { return DAG; }
847
848 /// Returns a type large enough to hold any valid shift amount - before type
849 /// legalization these can be huge.
850 EVT getShiftAmountTy(EVT LHSTy) {
851 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
852 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
853 }
854
855 /// This method returns true if we are running before type legalization or
856 /// if the specified VT is legal.
857 bool isTypeLegal(const EVT &VT) {
858 if (!LegalTypes) return true;
859 return TLI.isTypeLegal(VT);
860 }
861
862 /// Convenience wrapper around TargetLowering::getSetCCResultType
863 EVT getSetCCResultType(EVT VT) const {
864 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
865 }
866
867 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
868 SDValue OrigLoad, SDValue ExtLoad,
869 ISD::NodeType ExtType);
870 };
871
872/// This class is a DAGUpdateListener that removes any deleted
873/// nodes from the worklist.
874class WorklistRemover : public SelectionDAG::DAGUpdateListener {
875 DAGCombiner &DC;
876
877public:
878 explicit WorklistRemover(DAGCombiner &dc)
879 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
880
881 void NodeDeleted(SDNode *N, SDNode *E) override {
882 DC.removeFromWorklist(N);
883 }
884};
885
886class WorklistInserter : public SelectionDAG::DAGUpdateListener {
887 DAGCombiner &DC;
888
889public:
890 explicit WorklistInserter(DAGCombiner &dc)
891 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
892
893 // FIXME: Ideally we could add N to the worklist, but this causes exponential
894 // compile time costs in large DAGs, e.g. Halide.
895 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
896};
897
898} // end anonymous namespace
899
900//===----------------------------------------------------------------------===//
901// TargetLowering::DAGCombinerInfo implementation
902//===----------------------------------------------------------------------===//
903
905 ((DAGCombiner*)DC)->AddToWorklist(N);
906}
907
909CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
910 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
911}
912
914CombineTo(SDNode *N, SDValue Res, bool AddTo) {
915 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
916}
917
919CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
920 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
921}
922
925 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
926}
927
930 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
931}
932
933//===----------------------------------------------------------------------===//
934// Helper Functions
935//===----------------------------------------------------------------------===//
936
937void DAGCombiner::deleteAndRecombine(SDNode *N) {
938 removeFromWorklist(N);
939
940 // If the operands of this node are only used by the node, they will now be
941 // dead. Make sure to re-visit them and recursively delete dead nodes.
942 for (const SDValue &Op : N->ops())
943 // For an operand generating multiple values, one of the values may
944 // become dead allowing further simplification (e.g. split index
945 // arithmetic from an indexed load).
946 if (Op->hasOneUse() || Op->getNumValues() > 1)
947 AddToWorklist(Op.getNode());
948
949 DAG.DeleteNode(N);
950}
951
952// APInts must be the same size for most operations, this helper
953// function zero extends the shorter of the pair so that they match.
954// We provide an Offset so that we can create bitwidths that won't overflow.
955static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
956 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
957 LHS = LHS.zext(Bits);
958 RHS = RHS.zext(Bits);
959}
960
961// Return true if this node is a setcc, or is a select_cc
962// that selects between the target values used for true and false, making it
963// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
964// the appropriate nodes based on the type of node we are checking. This
965// simplifies life a bit for the callers.
966bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
967 SDValue &CC, bool MatchStrict) const {
968 if (N.getOpcode() == ISD::SETCC) {
969 LHS = N.getOperand(0);
970 RHS = N.getOperand(1);
971 CC = N.getOperand(2);
972 return true;
973 }
974
975 if (MatchStrict &&
976 (N.getOpcode() == ISD::STRICT_FSETCC ||
977 N.getOpcode() == ISD::STRICT_FSETCCS)) {
978 LHS = N.getOperand(1);
979 RHS = N.getOperand(2);
980 CC = N.getOperand(3);
981 return true;
982 }
983
984 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
985 !TLI.isConstFalseVal(N.getOperand(3)))
986 return false;
987
988 if (TLI.getBooleanContents(N.getValueType()) ==
990 return false;
991
992 LHS = N.getOperand(0);
993 RHS = N.getOperand(1);
994 CC = N.getOperand(4);
995 return true;
996}
997
998/// Return true if this is a SetCC-equivalent operation with only one use.
999/// If this is true, it allows the users to invert the operation for free when
1000/// it is profitable to do so.
1001bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1002 SDValue N0, N1, N2;
1003 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1004 return true;
1005 return false;
1006}
1007
1009 if (!ScalarTy.isSimple())
1010 return false;
1011
1012 uint64_t MaskForTy = 0ULL;
1013 switch (ScalarTy.getSimpleVT().SimpleTy) {
1014 case MVT::i8:
1015 MaskForTy = 0xFFULL;
1016 break;
1017 case MVT::i16:
1018 MaskForTy = 0xFFFFULL;
1019 break;
1020 case MVT::i32:
1021 MaskForTy = 0xFFFFFFFFULL;
1022 break;
1023 default:
1024 return false;
1025 break;
1026 }
1027
1028 APInt Val;
1029 if (ISD::isConstantSplatVector(N, Val))
1030 return Val.getLimitedValue() == MaskForTy;
1031
1032 return false;
1033}
1034
1035// Determines if it is a constant integer or a splat/build vector of constant
1036// integers (and undefs).
1037// Do not permit build vector implicit truncation.
1038static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1039 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1040 return !(Const->isOpaque() && NoOpaques);
1041 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1042 return false;
1043 unsigned BitWidth = N.getScalarValueSizeInBits();
1044 for (const SDValue &Op : N->op_values()) {
1045 if (Op.isUndef())
1046 continue;
1047 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1048 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1049 (Const->isOpaque() && NoOpaques))
1050 return false;
1051 }
1052 return true;
1053}
1054
1055// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1056// undef's.
1057static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1058 if (V.getOpcode() != ISD::BUILD_VECTOR)
1059 return false;
1060 return isConstantOrConstantVector(V, NoOpaques) ||
1062}
1063
1064// Determine if this an indexed load with an opaque target constant index.
1065static bool canSplitIdx(LoadSDNode *LD) {
1066 return MaySplitLoadIndex &&
1067 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1068 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1069}
1070
1071bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1072 const SDLoc &DL,
1073 SDNode *N,
1074 SDValue N0,
1075 SDValue N1) {
1076 // Currently this only tries to ensure we don't undo the GEP splits done by
1077 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1078 // we check if the following transformation would be problematic:
1079 // (load/store (add, (add, x, offset1), offset2)) ->
1080 // (load/store (add, x, offset1+offset2)).
1081
1082 // (load/store (add, (add, x, y), offset2)) ->
1083 // (load/store (add, (add, x, offset2), y)).
1084
1085 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1086 return false;
1087
1088 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1089 if (!C2)
1090 return false;
1091
1092 const APInt &C2APIntVal = C2->getAPIntValue();
1093 if (C2APIntVal.getSignificantBits() > 64)
1094 return false;
1095
1096 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1097 if (N0.hasOneUse())
1098 return false;
1099
1100 const APInt &C1APIntVal = C1->getAPIntValue();
1101 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1102 if (CombinedValueIntVal.getSignificantBits() > 64)
1103 return false;
1104 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1105
1106 for (SDNode *Node : N->uses()) {
1107 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1108 // Is x[offset2] already not a legal addressing mode? If so then
1109 // reassociating the constants breaks nothing (we test offset2 because
1110 // that's the one we hope to fold into the load or store).
1112 AM.HasBaseReg = true;
1113 AM.BaseOffs = C2APIntVal.getSExtValue();
1114 EVT VT = LoadStore->getMemoryVT();
1115 unsigned AS = LoadStore->getAddressSpace();
1116 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1117 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1118 continue;
1119
1120 // Would x[offset1+offset2] still be a legal addressing mode?
1121 AM.BaseOffs = CombinedValue;
1122 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1123 return true;
1124 }
1125 }
1126 } else {
1127 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1128 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1129 return false;
1130
1131 for (SDNode *Node : N->uses()) {
1132 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1133 if (!LoadStore)
1134 return false;
1135
1136 // Is x[offset2] a legal addressing mode? If so then
1137 // reassociating the constants breaks address pattern
1139 AM.HasBaseReg = true;
1140 AM.BaseOffs = C2APIntVal.getSExtValue();
1141 EVT VT = LoadStore->getMemoryVT();
1142 unsigned AS = LoadStore->getAddressSpace();
1143 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1144 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1145 return false;
1146 }
1147 return true;
1148 }
1149
1150 return false;
1151}
1152
1153/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1154/// \p N0 is the same kind of operation as \p Opc.
1155SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1156 SDValue N0, SDValue N1,
1157 SDNodeFlags Flags) {
1158 EVT VT = N0.getValueType();
1159
1160 if (N0.getOpcode() != Opc)
1161 return SDValue();
1162
1163 SDValue N00 = N0.getOperand(0);
1164 SDValue N01 = N0.getOperand(1);
1165
1167 SDNodeFlags NewFlags;
1168 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1169 Flags.hasNoUnsignedWrap())
1170 NewFlags.setNoUnsignedWrap(true);
1171
1173 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1174 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1175 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1176 return SDValue();
1177 }
1178 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1179 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1180 // iff (op x, c1) has one use
1181 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1182 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1183 }
1184 }
1185
1186 // Check for repeated operand logic simplifications.
1187 if (Opc == ISD::AND || Opc == ISD::OR) {
1188 // (N00 & N01) & N00 --> N00 & N01
1189 // (N00 & N01) & N01 --> N00 & N01
1190 // (N00 | N01) | N00 --> N00 | N01
1191 // (N00 | N01) | N01 --> N00 | N01
1192 if (N1 == N00 || N1 == N01)
1193 return N0;
1194 }
1195 if (Opc == ISD::XOR) {
1196 // (N00 ^ N01) ^ N00 --> N01
1197 if (N1 == N00)
1198 return N01;
1199 // (N00 ^ N01) ^ N01 --> N00
1200 if (N1 == N01)
1201 return N00;
1202 }
1203
1204 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1205 if (N1 != N01) {
1206 // Reassociate if (op N00, N1) already exist
1207 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1208 // if Op (Op N00, N1), N01 already exist
1209 // we need to stop reassciate to avoid dead loop
1210 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1211 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1212 }
1213 }
1214
1215 if (N1 != N00) {
1216 // Reassociate if (op N01, N1) already exist
1217 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1218 // if Op (Op N01, N1), N00 already exist
1219 // we need to stop reassciate to avoid dead loop
1220 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1221 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1222 }
1223 }
1224
1225 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1226 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1227 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1228 // comparisons with the same predicate. This enables optimizations as the
1229 // following one:
1230 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1231 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1232 if (Opc == ISD::AND || Opc == ISD::OR) {
1233 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1234 N01->getOpcode() == ISD::SETCC) {
1235 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1236 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1237 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1238 if (CC1 == CC00 && CC1 != CC01) {
1239 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1240 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1241 }
1242 if (CC1 == CC01 && CC1 != CC00) {
1243 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1244 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1245 }
1246 }
1247 }
1248 }
1249
1250 return SDValue();
1251}
1252
1253/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1254/// same kind of operation as \p Opc.
1255SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1256 SDValue N1, SDNodeFlags Flags) {
1257 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1258
1259 // Floating-point reassociation is not allowed without loose FP math.
1260 if (N0.getValueType().isFloatingPoint() ||
1262 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1263 return SDValue();
1264
1265 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1266 return Combined;
1267 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1268 return Combined;
1269 return SDValue();
1270}
1271
1272// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1273// Note that we only expect Flags to be passed from FP operations. For integer
1274// operations they need to be dropped.
1275SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1276 const SDLoc &DL, EVT VT, SDValue N0,
1277 SDValue N1, SDNodeFlags Flags) {
1278 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1279 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1280 N0->hasOneUse() && N1->hasOneUse() &&
1282 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1283 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1284 return DAG.getNode(RedOpc, DL, VT,
1285 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1286 N0.getOperand(0), N1.getOperand(0)));
1287 }
1288 return SDValue();
1289}
1290
1291SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1292 bool AddTo) {
1293 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1294 ++NodesCombined;
1295 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1296 To[0].dump(&DAG);
1297 dbgs() << " and " << NumTo - 1 << " other values\n");
1298 for (unsigned i = 0, e = NumTo; i != e; ++i)
1299 assert((!To[i].getNode() ||
1300 N->getValueType(i) == To[i].getValueType()) &&
1301 "Cannot combine value to value of different type!");
1302
1303 WorklistRemover DeadNodes(*this);
1304 DAG.ReplaceAllUsesWith(N, To);
1305 if (AddTo) {
1306 // Push the new nodes and any users onto the worklist
1307 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1308 if (To[i].getNode())
1309 AddToWorklistWithUsers(To[i].getNode());
1310 }
1311 }
1312
1313 // Finally, if the node is now dead, remove it from the graph. The node
1314 // may not be dead if the replacement process recursively simplified to
1315 // something else needing this node.
1316 if (N->use_empty())
1317 deleteAndRecombine(N);
1318 return SDValue(N, 0);
1319}
1320
1321void DAGCombiner::
1322CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1323 // Replace the old value with the new one.
1324 ++NodesCombined;
1325 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1326 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1327
1328 // Replace all uses.
1329 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1330
1331 // Push the new node and any (possibly new) users onto the worklist.
1332 AddToWorklistWithUsers(TLO.New.getNode());
1333
1334 // Finally, if the node is now dead, remove it from the graph.
1335 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1336}
1337
1338/// Check the specified integer node value to see if it can be simplified or if
1339/// things it uses can be simplified by bit propagation. If so, return true.
1340bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1341 const APInt &DemandedElts,
1342 bool AssumeSingleUse) {
1343 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1344 KnownBits Known;
1345 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1346 AssumeSingleUse))
1347 return false;
1348
1349 // Revisit the node.
1350 AddToWorklist(Op.getNode());
1351
1352 CommitTargetLoweringOpt(TLO);
1353 return true;
1354}
1355
1356/// Check the specified vector node value to see if it can be simplified or
1357/// if things it uses can be simplified as it only uses some of the elements.
1358/// If so, return true.
1359bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1360 const APInt &DemandedElts,
1361 bool AssumeSingleUse) {
1362 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1363 APInt KnownUndef, KnownZero;
1364 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1365 TLO, 0, AssumeSingleUse))
1366 return false;
1367
1368 // Revisit the node.
1369 AddToWorklist(Op.getNode());
1370
1371 CommitTargetLoweringOpt(TLO);
1372 return true;
1373}
1374
1375void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1376 SDLoc DL(Load);
1377 EVT VT = Load->getValueType(0);
1378 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1379
1380 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1381 Trunc.dump(&DAG); dbgs() << '\n');
1382
1383 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1384 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1385
1386 AddToWorklist(Trunc.getNode());
1387 recursivelyDeleteUnusedNodes(Load);
1388}
1389
1390SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1391 Replace = false;
1392 SDLoc DL(Op);
1393 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1394 LoadSDNode *LD = cast<LoadSDNode>(Op);
1395 EVT MemVT = LD->getMemoryVT();
1397 : LD->getExtensionType();
1398 Replace = true;
1399 return DAG.getExtLoad(ExtType, DL, PVT,
1400 LD->getChain(), LD->getBasePtr(),
1401 MemVT, LD->getMemOperand());
1402 }
1403
1404 unsigned Opc = Op.getOpcode();
1405 switch (Opc) {
1406 default: break;
1407 case ISD::AssertSext:
1408 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1409 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1410 break;
1411 case ISD::AssertZext:
1412 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1413 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1414 break;
1415 case ISD::Constant: {
1416 unsigned ExtOpc =
1417 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1418 return DAG.getNode(ExtOpc, DL, PVT, Op);
1419 }
1420 }
1421
1422 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1423 return SDValue();
1424 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1425}
1426
1427SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1429 return SDValue();
1430 EVT OldVT = Op.getValueType();
1431 SDLoc DL(Op);
1432 bool Replace = false;
1433 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1434 if (!NewOp.getNode())
1435 return SDValue();
1436 AddToWorklist(NewOp.getNode());
1437
1438 if (Replace)
1439 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1440 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1441 DAG.getValueType(OldVT));
1442}
1443
1444SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1445 EVT OldVT = Op.getValueType();
1446 SDLoc DL(Op);
1447 bool Replace = false;
1448 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1449 if (!NewOp.getNode())
1450 return SDValue();
1451 AddToWorklist(NewOp.getNode());
1452
1453 if (Replace)
1454 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1455 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1456}
1457
1458/// Promote the specified integer binary operation if the target indicates it is
1459/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1460/// i32 since i16 instructions are longer.
1461SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1462 if (!LegalOperations)
1463 return SDValue();
1464
1465 EVT VT = Op.getValueType();
1466 if (VT.isVector() || !VT.isInteger())
1467 return SDValue();
1468
1469 // If operation type is 'undesirable', e.g. i16 on x86, consider
1470 // promoting it.
1471 unsigned Opc = Op.getOpcode();
1472 if (TLI.isTypeDesirableForOp(Opc, VT))
1473 return SDValue();
1474
1475 EVT PVT = VT;
1476 // Consult target whether it is a good idea to promote this operation and
1477 // what's the right type to promote it to.
1478 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1479 assert(PVT != VT && "Don't know what type to promote to!");
1480
1481 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1482
1483 bool Replace0 = false;
1484 SDValue N0 = Op.getOperand(0);
1485 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1486
1487 bool Replace1 = false;
1488 SDValue N1 = Op.getOperand(1);
1489 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1490 SDLoc DL(Op);
1491
1492 SDValue RV =
1493 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1494
1495 // We are always replacing N0/N1's use in N and only need additional
1496 // replacements if there are additional uses.
1497 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1498 // (SDValue) here because the node may reference multiple values
1499 // (for example, the chain value of a load node).
1500 Replace0 &= !N0->hasOneUse();
1501 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1502
1503 // Combine Op here so it is preserved past replacements.
1504 CombineTo(Op.getNode(), RV);
1505
1506 // If operands have a use ordering, make sure we deal with
1507 // predecessor first.
1508 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1509 std::swap(N0, N1);
1510 std::swap(NN0, NN1);
1511 }
1512
1513 if (Replace0) {
1514 AddToWorklist(NN0.getNode());
1515 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1516 }
1517 if (Replace1) {
1518 AddToWorklist(NN1.getNode());
1519 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1520 }
1521 return Op;
1522 }
1523 return SDValue();
1524}
1525
1526/// Promote the specified integer shift operation if the target indicates it is
1527/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1528/// i32 since i16 instructions are longer.
1529SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1530 if (!LegalOperations)
1531 return SDValue();
1532
1533 EVT VT = Op.getValueType();
1534 if (VT.isVector() || !VT.isInteger())
1535 return SDValue();
1536
1537 // If operation type is 'undesirable', e.g. i16 on x86, consider
1538 // promoting it.
1539 unsigned Opc = Op.getOpcode();
1540 if (TLI.isTypeDesirableForOp(Opc, VT))
1541 return SDValue();
1542
1543 EVT PVT = VT;
1544 // Consult target whether it is a good idea to promote this operation and
1545 // what's the right type to promote it to.
1546 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1547 assert(PVT != VT && "Don't know what type to promote to!");
1548
1549 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1550
1551 bool Replace = false;
1552 SDValue N0 = Op.getOperand(0);
1553 if (Opc == ISD::SRA)
1554 N0 = SExtPromoteOperand(N0, PVT);
1555 else if (Opc == ISD::SRL)
1556 N0 = ZExtPromoteOperand(N0, PVT);
1557 else
1558 N0 = PromoteOperand(N0, PVT, Replace);
1559
1560 if (!N0.getNode())
1561 return SDValue();
1562
1563 SDLoc DL(Op);
1564 SDValue N1 = Op.getOperand(1);
1565 SDValue RV =
1566 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1567
1568 if (Replace)
1569 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1570
1571 // Deal with Op being deleted.
1572 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1573 return RV;
1574 }
1575 return SDValue();
1576}
1577
1578SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1579 if (!LegalOperations)
1580 return SDValue();
1581
1582 EVT VT = Op.getValueType();
1583 if (VT.isVector() || !VT.isInteger())
1584 return SDValue();
1585
1586 // If operation type is 'undesirable', e.g. i16 on x86, consider
1587 // promoting it.
1588 unsigned Opc = Op.getOpcode();
1589 if (TLI.isTypeDesirableForOp(Opc, VT))
1590 return SDValue();
1591
1592 EVT PVT = VT;
1593 // Consult target whether it is a good idea to promote this operation and
1594 // what's the right type to promote it to.
1595 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1596 assert(PVT != VT && "Don't know what type to promote to!");
1597 // fold (aext (aext x)) -> (aext x)
1598 // fold (aext (zext x)) -> (zext x)
1599 // fold (aext (sext x)) -> (sext x)
1600 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1601 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1602 }
1603 return SDValue();
1604}
1605
1606bool DAGCombiner::PromoteLoad(SDValue Op) {
1607 if (!LegalOperations)
1608 return false;
1609
1610 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1611 return false;
1612
1613 EVT VT = Op.getValueType();
1614 if (VT.isVector() || !VT.isInteger())
1615 return false;
1616
1617 // If operation type is 'undesirable', e.g. i16 on x86, consider
1618 // promoting it.
1619 unsigned Opc = Op.getOpcode();
1620 if (TLI.isTypeDesirableForOp(Opc, VT))
1621 return false;
1622
1623 EVT PVT = VT;
1624 // Consult target whether it is a good idea to promote this operation and
1625 // what's the right type to promote it to.
1626 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1627 assert(PVT != VT && "Don't know what type to promote to!");
1628
1629 SDLoc DL(Op);
1630 SDNode *N = Op.getNode();
1631 LoadSDNode *LD = cast<LoadSDNode>(N);
1632 EVT MemVT = LD->getMemoryVT();
1634 : LD->getExtensionType();
1635 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1636 LD->getChain(), LD->getBasePtr(),
1637 MemVT, LD->getMemOperand());
1638 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1639
1640 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1641 Result.dump(&DAG); dbgs() << '\n');
1642
1644 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1645
1646 AddToWorklist(Result.getNode());
1647 recursivelyDeleteUnusedNodes(N);
1648 return true;
1649 }
1650
1651 return false;
1652}
1653
1654/// Recursively delete a node which has no uses and any operands for
1655/// which it is the only use.
1656///
1657/// Note that this both deletes the nodes and removes them from the worklist.
1658/// It also adds any nodes who have had a user deleted to the worklist as they
1659/// may now have only one use and subject to other combines.
1660bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1661 if (!N->use_empty())
1662 return false;
1663
1665 Nodes.insert(N);
1666 do {
1667 N = Nodes.pop_back_val();
1668 if (!N)
1669 continue;
1670
1671 if (N->use_empty()) {
1672 for (const SDValue &ChildN : N->op_values())
1673 Nodes.insert(ChildN.getNode());
1674
1675 removeFromWorklist(N);
1676 DAG.DeleteNode(N);
1677 } else {
1678 AddToWorklist(N);
1679 }
1680 } while (!Nodes.empty());
1681 return true;
1682}
1683
1684//===----------------------------------------------------------------------===//
1685// Main DAG Combiner implementation
1686//===----------------------------------------------------------------------===//
1687
1688void DAGCombiner::Run(CombineLevel AtLevel) {
1689 // set the instance variables, so that the various visit routines may use it.
1690 Level = AtLevel;
1691 LegalDAG = Level >= AfterLegalizeDAG;
1692 LegalOperations = Level >= AfterLegalizeVectorOps;
1693 LegalTypes = Level >= AfterLegalizeTypes;
1694
1695 WorklistInserter AddNodes(*this);
1696
1697 // Add all the dag nodes to the worklist.
1698 //
1699 // Note: All nodes are not added to PruningList here, this is because the only
1700 // nodes which can be deleted are those which have no uses and all other nodes
1701 // which would otherwise be added to the worklist by the first call to
1702 // getNextWorklistEntry are already present in it.
1703 for (SDNode &Node : DAG.allnodes())
1704 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1705
1706 // Create a dummy node (which is not added to allnodes), that adds a reference
1707 // to the root node, preventing it from being deleted, and tracking any
1708 // changes of the root.
1709 HandleSDNode Dummy(DAG.getRoot());
1710
1711 // While we have a valid worklist entry node, try to combine it.
1712 while (SDNode *N = getNextWorklistEntry()) {
1713 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1714 // N is deleted from the DAG, since they too may now be dead or may have a
1715 // reduced number of uses, allowing other xforms.
1716 if (recursivelyDeleteUnusedNodes(N))
1717 continue;
1718
1719 WorklistRemover DeadNodes(*this);
1720
1721 // If this combine is running after legalizing the DAG, re-legalize any
1722 // nodes pulled off the worklist.
1723 if (LegalDAG) {
1724 SmallSetVector<SDNode *, 16> UpdatedNodes;
1725 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1726
1727 for (SDNode *LN : UpdatedNodes)
1728 AddToWorklistWithUsers(LN);
1729
1730 if (!NIsValid)
1731 continue;
1732 }
1733
1734 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1735
1736 // Add any operands of the new node which have not yet been combined to the
1737 // worklist as well. Because the worklist uniques things already, this
1738 // won't repeatedly process the same operand.
1739 for (const SDValue &ChildN : N->op_values())
1740 if (!CombinedNodes.count(ChildN.getNode()))
1741 AddToWorklist(ChildN.getNode());
1742
1743 CombinedNodes.insert(N);
1744 SDValue RV = combine(N);
1745
1746 if (!RV.getNode())
1747 continue;
1748
1749 ++NodesCombined;
1750
1751 // If we get back the same node we passed in, rather than a new node or
1752 // zero, we know that the node must have defined multiple values and
1753 // CombineTo was used. Since CombineTo takes care of the worklist
1754 // mechanics for us, we have no work to do in this case.
1755 if (RV.getNode() == N)
1756 continue;
1757
1758 assert(N->getOpcode() != ISD::DELETED_NODE &&
1759 RV.getOpcode() != ISD::DELETED_NODE &&
1760 "Node was deleted but visit returned new node!");
1761
1762 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1763
1764 if (N->getNumValues() == RV->getNumValues())
1765 DAG.ReplaceAllUsesWith(N, RV.getNode());
1766 else {
1767 assert(N->getValueType(0) == RV.getValueType() &&
1768 N->getNumValues() == 1 && "Type mismatch");
1769 DAG.ReplaceAllUsesWith(N, &RV);
1770 }
1771
1772 // Push the new node and any users onto the worklist. Omit this if the
1773 // new node is the EntryToken (e.g. if a store managed to get optimized
1774 // out), because re-visiting the EntryToken and its users will not uncover
1775 // any additional opportunities, but there may be a large number of such
1776 // users, potentially causing compile time explosion.
1777 if (RV.getOpcode() != ISD::EntryToken)
1778 AddToWorklistWithUsers(RV.getNode());
1779
1780 // Finally, if the node is now dead, remove it from the graph. The node
1781 // may not be dead if the replacement process recursively simplified to
1782 // something else needing this node. This will also take care of adding any
1783 // operands which have lost a user to the worklist.
1784 recursivelyDeleteUnusedNodes(N);
1785 }
1786
1787 // If the root changed (e.g. it was a dead load, update the root).
1788 DAG.setRoot(Dummy.getValue());
1789 DAG.RemoveDeadNodes();
1790}
1791
1792SDValue DAGCombiner::visit(SDNode *N) {
1793 // clang-format off
1794 switch (N->getOpcode()) {
1795 default: break;
1796 case ISD::TokenFactor: return visitTokenFactor(N);
1797 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1798 case ISD::ADD: return visitADD(N);
1799 case ISD::SUB: return visitSUB(N);
1800 case ISD::SADDSAT:
1801 case ISD::UADDSAT: return visitADDSAT(N);
1802 case ISD::SSUBSAT:
1803 case ISD::USUBSAT: return visitSUBSAT(N);
1804 case ISD::ADDC: return visitADDC(N);
1805 case ISD::SADDO:
1806 case ISD::UADDO: return visitADDO(N);
1807 case ISD::SUBC: return visitSUBC(N);
1808 case ISD::SSUBO:
1809 case ISD::USUBO: return visitSUBO(N);
1810 case ISD::ADDE: return visitADDE(N);
1811 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1812 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1813 case ISD::SUBE: return visitSUBE(N);
1814 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1815 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1816 case ISD::SMULFIX:
1817 case ISD::SMULFIXSAT:
1818 case ISD::UMULFIX:
1819 case ISD::UMULFIXSAT: return visitMULFIX(N);
1820 case ISD::MUL: return visitMUL(N);
1821 case ISD::SDIV: return visitSDIV(N);
1822 case ISD::UDIV: return visitUDIV(N);
1823 case ISD::SREM:
1824 case ISD::UREM: return visitREM(N);
1825 case ISD::MULHU: return visitMULHU(N);
1826 case ISD::MULHS: return visitMULHS(N);
1827 case ISD::AVGFLOORS:
1828 case ISD::AVGFLOORU:
1829 case ISD::AVGCEILS:
1830 case ISD::AVGCEILU: return visitAVG(N);
1831 case ISD::ABDS:
1832 case ISD::ABDU: return visitABD(N);
1833 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1834 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1835 case ISD::SMULO:
1836 case ISD::UMULO: return visitMULO(N);
1837 case ISD::SMIN:
1838 case ISD::SMAX:
1839 case ISD::UMIN:
1840 case ISD::UMAX: return visitIMINMAX(N);
1841 case ISD::AND: return visitAND(N);
1842 case ISD::OR: return visitOR(N);
1843 case ISD::XOR: return visitXOR(N);
1844 case ISD::SHL: return visitSHL(N);
1845 case ISD::SRA: return visitSRA(N);
1846 case ISD::SRL: return visitSRL(N);
1847 case ISD::ROTR:
1848 case ISD::ROTL: return visitRotate(N);
1849 case ISD::FSHL:
1850 case ISD::FSHR: return visitFunnelShift(N);
1851 case ISD::SSHLSAT:
1852 case ISD::USHLSAT: return visitSHLSAT(N);
1853 case ISD::ABS: return visitABS(N);
1854 case ISD::BSWAP: return visitBSWAP(N);
1855 case ISD::BITREVERSE: return visitBITREVERSE(N);
1856 case ISD::CTLZ: return visitCTLZ(N);
1857 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1858 case ISD::CTTZ: return visitCTTZ(N);
1859 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1860 case ISD::CTPOP: return visitCTPOP(N);
1861 case ISD::SELECT: return visitSELECT(N);
1862 case ISD::VSELECT: return visitVSELECT(N);
1863 case ISD::SELECT_CC: return visitSELECT_CC(N);
1864 case ISD::SETCC: return visitSETCC(N);
1865 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1866 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1867 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1868 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1869 case ISD::AssertSext:
1870 case ISD::AssertZext: return visitAssertExt(N);
1871 case ISD::AssertAlign: return visitAssertAlign(N);
1872 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1875 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1876 case ISD::TRUNCATE: return visitTRUNCATE(N);
1877 case ISD::BITCAST: return visitBITCAST(N);
1878 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1879 case ISD::FADD: return visitFADD(N);
1880 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1881 case ISD::FSUB: return visitFSUB(N);
1882 case ISD::FMUL: return visitFMUL(N);
1883 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1884 case ISD::FMAD: return visitFMAD(N);
1885 case ISD::FDIV: return visitFDIV(N);
1886 case ISD::FREM: return visitFREM(N);
1887 case ISD::FSQRT: return visitFSQRT(N);
1888 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1889 case ISD::FPOW: return visitFPOW(N);
1890 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1891 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1892 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1893 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1894 case ISD::LRINT:
1895 case ISD::LLRINT: return visitXRINT(N);
1896 case ISD::FP_ROUND: return visitFP_ROUND(N);
1897 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1898 case ISD::FNEG: return visitFNEG(N);
1899 case ISD::FABS: return visitFABS(N);
1900 case ISD::FFLOOR: return visitFFLOOR(N);
1901 case ISD::FMINNUM:
1902 case ISD::FMAXNUM:
1903 case ISD::FMINIMUM:
1904 case ISD::FMAXIMUM: return visitFMinMax(N);
1905 case ISD::FCEIL: return visitFCEIL(N);
1906 case ISD::FTRUNC: return visitFTRUNC(N);
1907 case ISD::FFREXP: return visitFFREXP(N);
1908 case ISD::BRCOND: return visitBRCOND(N);
1909 case ISD::BR_CC: return visitBR_CC(N);
1910 case ISD::LOAD: return visitLOAD(N);
1911 case ISD::STORE: return visitSTORE(N);
1912 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1913 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1914 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1915 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1916 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1917 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1918 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1919 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1920 case ISD::MGATHER: return visitMGATHER(N);
1921 case ISD::MLOAD: return visitMLOAD(N);
1922 case ISD::MSCATTER: return visitMSCATTER(N);
1923 case ISD::MSTORE: return visitMSTORE(N);
1924 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1925 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1926 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1927 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1928 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1929 case ISD::FREEZE: return visitFREEZE(N);
1930 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1931 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1934 case ISD::VECREDUCE_ADD:
1935 case ISD::VECREDUCE_MUL:
1936 case ISD::VECREDUCE_AND:
1937 case ISD::VECREDUCE_OR:
1938 case ISD::VECREDUCE_XOR:
1946 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1947#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1948#include "llvm/IR/VPIntrinsics.def"
1949 return visitVPOp(N);
1950 }
1951 // clang-format on
1952 return SDValue();
1953}
1954
1955SDValue DAGCombiner::combine(SDNode *N) {
1956 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1957 return SDValue();
1958
1959 SDValue RV;
1960 if (!DisableGenericCombines)
1961 RV = visit(N);
1962
1963 // If nothing happened, try a target-specific DAG combine.
1964 if (!RV.getNode()) {
1965 assert(N->getOpcode() != ISD::DELETED_NODE &&
1966 "Node was deleted but visit returned NULL!");
1967
1968 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1969 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1970
1971 // Expose the DAG combiner to the target combiner impls.
1973 DagCombineInfo(DAG, Level, false, this);
1974
1975 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1976 }
1977 }
1978
1979 // If nothing happened still, try promoting the operation.
1980 if (!RV.getNode()) {
1981 switch (N->getOpcode()) {
1982 default: break;
1983 case ISD::ADD:
1984 case ISD::SUB:
1985 case ISD::MUL:
1986 case ISD::AND:
1987 case ISD::OR:
1988 case ISD::XOR:
1989 RV = PromoteIntBinOp(SDValue(N, 0));
1990 break;
1991 case ISD::SHL:
1992 case ISD::SRA:
1993 case ISD::SRL:
1994 RV = PromoteIntShiftOp(SDValue(N, 0));
1995 break;
1996 case ISD::SIGN_EXTEND:
1997 case ISD::ZERO_EXTEND:
1998 case ISD::ANY_EXTEND:
1999 RV = PromoteExtend(SDValue(N, 0));
2000 break;
2001 case ISD::LOAD:
2002 if (PromoteLoad(SDValue(N, 0)))
2003 RV = SDValue(N, 0);
2004 break;
2005 }
2006 }
2007
2008 // If N is a commutative binary node, try to eliminate it if the commuted
2009 // version is already present in the DAG.
2010 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2011 SDValue N0 = N->getOperand(0);
2012 SDValue N1 = N->getOperand(1);
2013
2014 // Constant operands are canonicalized to RHS.
2015 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2016 SDValue Ops[] = {N1, N0};
2017 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2018 N->getFlags());
2019 if (CSENode)
2020 return SDValue(CSENode, 0);
2021 }
2022 }
2023
2024 return RV;
2025}
2026
2027/// Given a node, return its input chain if it has one, otherwise return a null
2028/// sd operand.
2030 if (unsigned NumOps = N->getNumOperands()) {
2031 if (N->getOperand(0).getValueType() == MVT::Other)
2032 return N->getOperand(0);
2033 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2034 return N->getOperand(NumOps-1);
2035 for (unsigned i = 1; i < NumOps-1; ++i)
2036 if (N->getOperand(i).getValueType() == MVT::Other)
2037 return N->getOperand(i);
2038 }
2039 return SDValue();
2040}
2041
2042SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2043 // If N has two operands, where one has an input chain equal to the other,
2044 // the 'other' chain is redundant.
2045 if (N->getNumOperands() == 2) {
2046 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2047 return N->getOperand(0);
2048 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2049 return N->getOperand(1);
2050 }
2051
2052 // Don't simplify token factors if optnone.
2053 if (OptLevel == CodeGenOptLevel::None)
2054 return SDValue();
2055
2056 // Don't simplify the token factor if the node itself has too many operands.
2057 if (N->getNumOperands() > TokenFactorInlineLimit)
2058 return SDValue();
2059
2060 // If the sole user is a token factor, we should make sure we have a
2061 // chance to merge them together. This prevents TF chains from inhibiting
2062 // optimizations.
2063 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2064 AddToWorklist(*(N->use_begin()));
2065
2066 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2067 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2069 bool Changed = false; // If we should replace this token factor.
2070
2071 // Start out with this token factor.
2072 TFs.push_back(N);
2073
2074 // Iterate through token factors. The TFs grows when new token factors are
2075 // encountered.
2076 for (unsigned i = 0; i < TFs.size(); ++i) {
2077 // Limit number of nodes to inline, to avoid quadratic compile times.
2078 // We have to add the outstanding Token Factors to Ops, otherwise we might
2079 // drop Ops from the resulting Token Factors.
2080 if (Ops.size() > TokenFactorInlineLimit) {
2081 for (unsigned j = i; j < TFs.size(); j++)
2082 Ops.emplace_back(TFs[j], 0);
2083 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2084 // combiner worklist later.
2085 TFs.resize(i);
2086 break;
2087 }
2088
2089 SDNode *TF = TFs[i];
2090 // Check each of the operands.
2091 for (const SDValue &Op : TF->op_values()) {
2092 switch (Op.getOpcode()) {
2093 case ISD::EntryToken:
2094 // Entry tokens don't need to be added to the list. They are
2095 // redundant.
2096 Changed = true;
2097 break;
2098
2099 case ISD::TokenFactor:
2100 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2101 // Queue up for processing.
2102 TFs.push_back(Op.getNode());
2103 Changed = true;
2104 break;
2105 }
2106 [[fallthrough]];
2107
2108 default:
2109 // Only add if it isn't already in the list.
2110 if (SeenOps.insert(Op.getNode()).second)
2111 Ops.push_back(Op);
2112 else
2113 Changed = true;
2114 break;
2115 }
2116 }
2117 }
2118
2119 // Re-visit inlined Token Factors, to clean them up in case they have been
2120 // removed. Skip the first Token Factor, as this is the current node.
2121 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2122 AddToWorklist(TFs[i]);
2123
2124 // Remove Nodes that are chained to another node in the list. Do so
2125 // by walking up chains breath-first stopping when we've seen
2126 // another operand. In general we must climb to the EntryNode, but we can exit
2127 // early if we find all remaining work is associated with just one operand as
2128 // no further pruning is possible.
2129
2130 // List of nodes to search through and original Ops from which they originate.
2132 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2133 SmallPtrSet<SDNode *, 16> SeenChains;
2134 bool DidPruneOps = false;
2135
2136 unsigned NumLeftToConsider = 0;
2137 for (const SDValue &Op : Ops) {
2138 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2139 OpWorkCount.push_back(1);
2140 }
2141
2142 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2143 // If this is an Op, we can remove the op from the list. Remark any
2144 // search associated with it as from the current OpNumber.
2145 if (SeenOps.contains(Op)) {
2146 Changed = true;
2147 DidPruneOps = true;
2148 unsigned OrigOpNumber = 0;
2149 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2150 OrigOpNumber++;
2151 assert((OrigOpNumber != Ops.size()) &&
2152 "expected to find TokenFactor Operand");
2153 // Re-mark worklist from OrigOpNumber to OpNumber
2154 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2155 if (Worklist[i].second == OrigOpNumber) {
2156 Worklist[i].second = OpNumber;
2157 }
2158 }
2159 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2160 OpWorkCount[OrigOpNumber] = 0;
2161 NumLeftToConsider--;
2162 }
2163 // Add if it's a new chain
2164 if (SeenChains.insert(Op).second) {
2165 OpWorkCount[OpNumber]++;
2166 Worklist.push_back(std::make_pair(Op, OpNumber));
2167 }
2168 };
2169
2170 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2171 // We need at least be consider at least 2 Ops to prune.
2172 if (NumLeftToConsider <= 1)
2173 break;
2174 auto CurNode = Worklist[i].first;
2175 auto CurOpNumber = Worklist[i].second;
2176 assert((OpWorkCount[CurOpNumber] > 0) &&
2177 "Node should not appear in worklist");
2178 switch (CurNode->getOpcode()) {
2179 case ISD::EntryToken:
2180 // Hitting EntryToken is the only way for the search to terminate without
2181 // hitting
2182 // another operand's search. Prevent us from marking this operand
2183 // considered.
2184 NumLeftToConsider++;
2185 break;
2186 case ISD::TokenFactor:
2187 for (const SDValue &Op : CurNode->op_values())
2188 AddToWorklist(i, Op.getNode(), CurOpNumber);
2189 break;
2191 case ISD::LIFETIME_END:
2192 case ISD::CopyFromReg:
2193 case ISD::CopyToReg:
2194 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2195 break;
2196 default:
2197 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2198 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2199 break;
2200 }
2201 OpWorkCount[CurOpNumber]--;
2202 if (OpWorkCount[CurOpNumber] == 0)
2203 NumLeftToConsider--;
2204 }
2205
2206 // If we've changed things around then replace token factor.
2207 if (Changed) {
2209 if (Ops.empty()) {
2210 // The entry token is the only possible outcome.
2211 Result = DAG.getEntryNode();
2212 } else {
2213 if (DidPruneOps) {
2214 SmallVector<SDValue, 8> PrunedOps;
2215 //
2216 for (const SDValue &Op : Ops) {
2217 if (SeenChains.count(Op.getNode()) == 0)
2218 PrunedOps.push_back(Op);
2219 }
2220 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2221 } else {
2222 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2223 }
2224 }
2225 return Result;
2226 }
2227 return SDValue();
2228}
2229
2230/// MERGE_VALUES can always be eliminated.
2231SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2232 WorklistRemover DeadNodes(*this);
2233 // Replacing results may cause a different MERGE_VALUES to suddenly
2234 // be CSE'd with N, and carry its uses with it. Iterate until no
2235 // uses remain, to ensure that the node can be safely deleted.
2236 // First add the users of this node to the work list so that they
2237 // can be tried again once they have new operands.
2238 AddUsersToWorklist(N);
2239 do {
2240 // Do as a single replacement to avoid rewalking use lists.
2242 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2243 Ops.push_back(N->getOperand(i));
2244 DAG.ReplaceAllUsesWith(N, Ops.data());
2245 } while (!N->use_empty());
2246 deleteAndRecombine(N);
2247 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2248}
2249
2250/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2251/// ConstantSDNode pointer else nullptr.
2253 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2254 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2255}
2256
2257// isTruncateOf - If N is a truncate of some other value, return true, record
2258// the value being truncated in Op and which of Op's bits are zero/one in Known.
2259// This function computes KnownBits to avoid a duplicated call to
2260// computeKnownBits in the caller.
2262 KnownBits &Known) {
2263 if (N->getOpcode() == ISD::TRUNCATE) {
2264 Op = N->getOperand(0);
2265 Known = DAG.computeKnownBits(Op);
2266 return true;
2267 }
2268
2269 if (N.getOpcode() != ISD::SETCC ||
2270 N.getValueType().getScalarType() != MVT::i1 ||
2271 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2272 return false;
2273
2274 SDValue Op0 = N->getOperand(0);
2275 SDValue Op1 = N->getOperand(1);
2276 assert(Op0.getValueType() == Op1.getValueType());
2277
2278 if (isNullOrNullSplat(Op0))
2279 Op = Op1;
2280 else if (isNullOrNullSplat(Op1))
2281 Op = Op0;
2282 else
2283 return false;
2284
2285 Known = DAG.computeKnownBits(Op);
2286
2287 return (Known.Zero | 1).isAllOnes();
2288}
2289
2290/// Return true if 'Use' is a load or a store that uses N as its base pointer
2291/// and that N may be folded in the load / store addressing mode.
2293 const TargetLowering &TLI) {
2294 EVT VT;
2295 unsigned AS;
2296
2297 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2298 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2299 return false;
2300 VT = LD->getMemoryVT();
2301 AS = LD->getAddressSpace();
2302 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2303 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2304 return false;
2305 VT = ST->getMemoryVT();
2306 AS = ST->getAddressSpace();
2307 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2308 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2309 return false;
2310 VT = LD->getMemoryVT();
2311 AS = LD->getAddressSpace();
2312 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2313 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2314 return false;
2315 VT = ST->getMemoryVT();
2316 AS = ST->getAddressSpace();
2317 } else {
2318 return false;
2319 }
2320
2322 if (N->getOpcode() == ISD::ADD) {
2323 AM.HasBaseReg = true;
2324 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2325 if (Offset)
2326 // [reg +/- imm]
2327 AM.BaseOffs = Offset->getSExtValue();
2328 else
2329 // [reg +/- reg]
2330 AM.Scale = 1;
2331 } else if (N->getOpcode() == ISD::SUB) {
2332 AM.HasBaseReg = true;
2333 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2334 if (Offset)
2335 // [reg +/- imm]
2336 AM.BaseOffs = -Offset->getSExtValue();
2337 else
2338 // [reg +/- reg]
2339 AM.Scale = 1;
2340 } else {
2341 return false;
2342 }
2343
2344 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2345 VT.getTypeForEVT(*DAG.getContext()), AS);
2346}
2347
2348/// This inverts a canonicalization in IR that replaces a variable select arm
2349/// with an identity constant. Codegen improves if we re-use the variable
2350/// operand rather than load a constant. This can also be converted into a
2351/// masked vector operation if the target supports it.
2353 bool ShouldCommuteOperands) {
2354 // Match a select as operand 1. The identity constant that we are looking for
2355 // is only valid as operand 1 of a non-commutative binop.
2356 SDValue N0 = N->getOperand(0);
2357 SDValue N1 = N->getOperand(1);
2358 if (ShouldCommuteOperands)
2359 std::swap(N0, N1);
2360
2361 // TODO: Should this apply to scalar select too?
2362 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2363 return SDValue();
2364
2365 // We can't hoist all instructions because of immediate UB (not speculatable).
2366 // For example div/rem by zero.
2368 return SDValue();
2369
2370 unsigned Opcode = N->getOpcode();
2371 EVT VT = N->getValueType(0);
2372 SDValue Cond = N1.getOperand(0);
2373 SDValue TVal = N1.getOperand(1);
2374 SDValue FVal = N1.getOperand(2);
2375
2376 // This transform increases uses of N0, so freeze it to be safe.
2377 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2378 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2379 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2380 SDValue F0 = DAG.getFreeze(N0);
2381 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2382 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2383 }
2384 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2385 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2386 SDValue F0 = DAG.getFreeze(N0);
2387 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2388 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2389 }
2390
2391 return SDValue();
2392}
2393
2394SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2395 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2396 "Unexpected binary operator");
2397
2398 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2399 auto BinOpcode = BO->getOpcode();
2400 EVT VT = BO->getValueType(0);
2401 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2402 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2403 return Sel;
2404
2405 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2406 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2407 return Sel;
2408 }
2409
2410 // Don't do this unless the old select is going away. We want to eliminate the
2411 // binary operator, not replace a binop with a select.
2412 // TODO: Handle ISD::SELECT_CC.
2413 unsigned SelOpNo = 0;
2414 SDValue Sel = BO->getOperand(0);
2415 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2416 SelOpNo = 1;
2417 Sel = BO->getOperand(1);
2418
2419 // Peek through trunc to shift amount type.
2420 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2421 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2422 // This is valid when the truncated bits of x are already zero.
2423 SDValue Op;
2424 KnownBits Known;
2425 if (isTruncateOf(DAG, Sel, Op, Known) &&
2427 Sel = Op;
2428 }
2429 }
2430
2431 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2432 return SDValue();
2433
2434 SDValue CT = Sel.getOperand(1);
2435 if (!isConstantOrConstantVector(CT, true) &&
2437 return SDValue();
2438
2439 SDValue CF = Sel.getOperand(2);
2440 if (!isConstantOrConstantVector(CF, true) &&
2442 return SDValue();
2443
2444 // Bail out if any constants are opaque because we can't constant fold those.
2445 // The exception is "and" and "or" with either 0 or -1 in which case we can
2446 // propagate non constant operands into select. I.e.:
2447 // and (select Cond, 0, -1), X --> select Cond, 0, X
2448 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2449 bool CanFoldNonConst =
2450 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2453
2454 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2455 if (!CanFoldNonConst &&
2456 !isConstantOrConstantVector(CBO, true) &&
2458 return SDValue();
2459
2460 SDLoc DL(Sel);
2461 SDValue NewCT, NewCF;
2462
2463 if (CanFoldNonConst) {
2464 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2465 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2466 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2467 NewCT = CT;
2468 else
2469 NewCT = CBO;
2470
2471 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2472 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2473 NewCF = CF;
2474 else
2475 NewCF = CBO;
2476 } else {
2477 // We have a select-of-constants followed by a binary operator with a
2478 // constant. Eliminate the binop by pulling the constant math into the
2479 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2480 // CBO, CF + CBO
2481 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2482 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2483 if (!NewCT)
2484 return SDValue();
2485
2486 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2487 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2488 if (!NewCF)
2489 return SDValue();
2490 }
2491
2492 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2493 SelectOp->setFlags(BO->getFlags());
2494 return SelectOp;
2495}
2496
2498 SelectionDAG &DAG) {
2499 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2500 "Expecting add or sub");
2501
2502 // Match a constant operand and a zext operand for the math instruction:
2503 // add Z, C
2504 // sub C, Z
2505 bool IsAdd = N->getOpcode() == ISD::ADD;
2506 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2507 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2508 auto *CN = dyn_cast<ConstantSDNode>(C);
2509 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2510 return SDValue();
2511
2512 // Match the zext operand as a setcc of a boolean.
2513 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2514 Z.getOperand(0).getValueType() != MVT::i1)
2515 return SDValue();
2516
2517 // Match the compare as: setcc (X & 1), 0, eq.
2518 SDValue SetCC = Z.getOperand(0);
2519 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2520 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2521 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2522 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2523 return SDValue();
2524
2525 // We are adding/subtracting a constant and an inverted low bit. Turn that
2526 // into a subtract/add of the low bit with incremented/decremented constant:
2527 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2528 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2529 EVT VT = C.getValueType();
2530 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2531 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2532 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2533 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2534}
2535
2536// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2537SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2538 SDValue N0 = N->getOperand(0);
2539 EVT VT = N0.getValueType();
2540 SDValue A, B;
2541
2542 if (hasOperation(ISD::AVGCEILU, VT) &&
2545 m_SpecificInt(1))))) {
2546 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2547 }
2548 if (hasOperation(ISD::AVGCEILS, VT) &&
2551 m_SpecificInt(1))))) {
2552 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2553 }
2554 return SDValue();
2555}
2556
2557/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2558/// a shift and add with a different constant.
2560 SelectionDAG &DAG) {
2561 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2562 "Expecting add or sub");
2563
2564 // We need a constant operand for the add/sub, and the other operand is a
2565 // logical shift right: add (srl), C or sub C, (srl).
2566 bool IsAdd = N->getOpcode() == ISD::ADD;
2567 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2568 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2569 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2570 ShiftOp.getOpcode() != ISD::SRL)
2571 return SDValue();
2572
2573 // The shift must be of a 'not' value.
2574 SDValue Not = ShiftOp.getOperand(0);
2575 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2576 return SDValue();
2577
2578 // The shift must be moving the sign bit to the least-significant-bit.
2579 EVT VT = ShiftOp.getValueType();
2580 SDValue ShAmt = ShiftOp.getOperand(1);
2581 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2582 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2583 return SDValue();
2584
2585 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2586 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2587 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2588 if (SDValue NewC = DAG.FoldConstantArithmetic(
2589 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2590 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2591 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2592 Not.getOperand(0), ShAmt);
2593 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2594 }
2595
2596 return SDValue();
2597}
2598
2599static bool
2601 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2602 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2603}
2604
2605/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2606/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2607/// are no common bits set in the operands).
2608SDValue DAGCombiner::visitADDLike(SDNode *N) {
2609 SDValue N0 = N->getOperand(0);
2610 SDValue N1 = N->getOperand(1);
2611 EVT VT = N0.getValueType();
2612 SDLoc DL(N);
2613
2614 // fold (add x, undef) -> undef
2615 if (N0.isUndef())
2616 return N0;
2617 if (N1.isUndef())
2618 return N1;
2619
2620 // fold (add c1, c2) -> c1+c2
2621 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2622 return C;
2623
2624 // canonicalize constant to RHS
2627 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2628
2629 if (areBitwiseNotOfEachother(N0, N1))
2631 SDLoc(N), VT);
2632
2633 // fold vector ops
2634 if (VT.isVector()) {
2635 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2636 return FoldedVOp;
2637
2638 // fold (add x, 0) -> x, vector edition
2640 return N0;
2641 }
2642
2643 // fold (add x, 0) -> x
2644 if (isNullConstant(N1))
2645 return N0;
2646
2647 if (N0.getOpcode() == ISD::SUB) {
2648 SDValue N00 = N0.getOperand(0);
2649 SDValue N01 = N0.getOperand(1);
2650
2651 // fold ((A-c1)+c2) -> (A+(c2-c1))
2652 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2653 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2654
2655 // fold ((c1-A)+c2) -> (c1+c2)-A
2656 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2657 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2658 }
2659
2660 // add (sext i1 X), 1 -> zext (not i1 X)
2661 // We don't transform this pattern:
2662 // add (zext i1 X), -1 -> sext (not i1 X)
2663 // because most (?) targets generate better code for the zext form.
2664 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2665 isOneOrOneSplat(N1)) {
2666 SDValue X = N0.getOperand(0);
2667 if ((!LegalOperations ||
2668 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2670 X.getScalarValueSizeInBits() == 1) {
2671 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2672 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2673 }
2674 }
2675
2676 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2677 // iff (or x, c0) is equivalent to (add x, c0).
2678 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2679 // iff (xor x, c0) is equivalent to (add x, c0).
2680 if (DAG.isADDLike(N0)) {
2681 SDValue N01 = N0.getOperand(1);
2682 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2683 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2684 }
2685
2686 if (SDValue NewSel = foldBinOpIntoSelect(N))
2687 return NewSel;
2688
2689 // reassociate add
2690 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2691 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2692 return RADD;
2693
2694 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2695 // equivalent to (add x, c).
2696 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2697 // equivalent to (add x, c).
2698 // Do this optimization only when adding c does not introduce instructions
2699 // for adding carries.
2700 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2701 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2702 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2703 // If N0's type does not split or is a sign mask, it does not introduce
2704 // add carry.
2705 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2706 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2709 if (NoAddCarry)
2710 return DAG.getNode(
2711 ISD::ADD, DL, VT,
2712 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2713 N0.getOperand(1));
2714 }
2715 return SDValue();
2716 };
2717 if (SDValue Add = ReassociateAddOr(N0, N1))
2718 return Add;
2719 if (SDValue Add = ReassociateAddOr(N1, N0))
2720 return Add;
2721
2722 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2723 if (SDValue SD =
2724 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2725 return SD;
2726 }
2727
2728 SDValue A, B, C;
2729
2730 // fold ((0-A) + B) -> B-A
2731 if (sd_match(N0, m_Neg(m_Value(A))))
2732 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2733
2734 // fold (A + (0-B)) -> A-B
2735 if (sd_match(N1, m_Neg(m_Value(B))))
2736 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2737
2738 // fold (A+(B-A)) -> B
2739 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2740 return B;
2741
2742 // fold ((B-A)+A) -> B
2743 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2744 return B;
2745
2746 // fold ((A-B)+(C-A)) -> (C-B)
2747 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2749 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2750
2751 // fold ((A-B)+(B-C)) -> (A-C)
2752 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2754 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2755
2756 // fold (A+(B-(A+C))) to (B-C)
2757 // fold (A+(B-(C+A))) to (B-C)
2758 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2759 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2760
2761 // fold (A+((B-A)+or-C)) to (B+or-C)
2762 if (sd_match(N1,
2764 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2765 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2766
2767 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2768 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2769 N0->hasOneUse() && N1->hasOneUse()) {
2770 SDValue N00 = N0.getOperand(0);
2771 SDValue N01 = N0.getOperand(1);
2772 SDValue N10 = N1.getOperand(0);
2773 SDValue N11 = N1.getOperand(1);
2774
2776 return DAG.getNode(ISD::SUB, DL, VT,
2777 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2778 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2779 }
2780
2781 // fold (add (umax X, C), -C) --> (usubsat X, C)
2782 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2783 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2784 return (!Max && !Op) ||
2785 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2786 };
2787 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2788 /*AllowUndefs*/ true))
2789 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2790 N0.getOperand(1));
2791 }
2792
2794 return SDValue(N, 0);
2795
2796 if (isOneOrOneSplat(N1)) {
2797 // fold (add (xor a, -1), 1) -> (sub 0, a)
2798 if (isBitwiseNot(N0))
2799 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2800 N0.getOperand(0));
2801
2802 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2803 if (N0.getOpcode() == ISD::ADD) {
2804 SDValue A, Xor;
2805
2806 if (isBitwiseNot(N0.getOperand(0))) {
2807 A = N0.getOperand(1);
2808 Xor = N0.getOperand(0);
2809 } else if (isBitwiseNot(N0.getOperand(1))) {
2810 A = N0.getOperand(0);
2811 Xor = N0.getOperand(1);
2812 }
2813
2814 if (Xor)
2815 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2816 }
2817
2818 // Look for:
2819 // add (add x, y), 1
2820 // And if the target does not like this form then turn into:
2821 // sub y, (xor x, -1)
2822 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2823 N0.hasOneUse() &&
2824 // Limit this to after legalization if the add has wrap flags
2825 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2826 !N->getFlags().hasNoSignedWrap()))) {
2827 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2828 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2829 }
2830 }
2831
2832 // (x - y) + -1 -> add (xor y, -1), x
2833 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2834 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2835 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2836 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2837 }
2838
2839 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2840 return Combined;
2841
2842 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2843 return Combined;
2844
2845 return SDValue();
2846}
2847
2848// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2849SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2850 SDValue N0 = N->getOperand(0);
2851 EVT VT = N0.getValueType();
2852 SDValue A, B;
2853
2854 if (hasOperation(ISD::AVGFLOORU, VT) &&
2857 m_SpecificInt(1))))) {
2858 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2859 }
2860 if (hasOperation(ISD::AVGFLOORS, VT) &&
2863 m_SpecificInt(1))))) {
2864 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2865 }
2866
2867 return SDValue();
2868}
2869
2870SDValue DAGCombiner::visitADD(SDNode *N) {
2871 SDValue N0 = N->getOperand(0);
2872 SDValue N1 = N->getOperand(1);
2873 EVT VT = N0.getValueType();
2874 SDLoc DL(N);
2875
2876 if (SDValue Combined = visitADDLike(N))
2877 return Combined;
2878
2879 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2880 return V;
2881
2882 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2883 return V;
2884
2885 // Try to match AVGFLOOR fixedwidth pattern
2886 if (SDValue V = foldAddToAvg(N, DL))
2887 return V;
2888
2889 // fold (a+b) -> (a|b) iff a and b share no bits.
2890 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2891 DAG.haveNoCommonBitsSet(N0, N1)) {
2893 Flags.setDisjoint(true);
2894 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2895 }
2896
2897 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2898 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2899 const APInt &C0 = N0->getConstantOperandAPInt(0);
2900 const APInt &C1 = N1->getConstantOperandAPInt(0);
2901 return DAG.getVScale(DL, VT, C0 + C1);
2902 }
2903
2904 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2905 if (N0.getOpcode() == ISD::ADD &&
2906 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2907 N1.getOpcode() == ISD::VSCALE) {
2908 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2909 const APInt &VS1 = N1->getConstantOperandAPInt(0);
2910 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2911 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2912 }
2913
2914 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
2915 if (N0.getOpcode() == ISD::STEP_VECTOR &&
2916 N1.getOpcode() == ISD::STEP_VECTOR) {
2917 const APInt &C0 = N0->getConstantOperandAPInt(0);
2918 const APInt &C1 = N1->getConstantOperandAPInt(0);
2919 APInt NewStep = C0 + C1;
2920 return DAG.getStepVector(DL, VT, NewStep);
2921 }
2922
2923 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2924 if (N0.getOpcode() == ISD::ADD &&
2926 N1.getOpcode() == ISD::STEP_VECTOR) {
2927 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2928 const APInt &SV1 = N1->getConstantOperandAPInt(0);
2929 APInt NewStep = SV0 + SV1;
2930 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2931 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2932 }
2933
2934 return SDValue();
2935}
2936
2937SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2938 unsigned Opcode = N->getOpcode();
2939 SDValue N0 = N->getOperand(0);
2940 SDValue N1 = N->getOperand(1);
2941 EVT VT = N0.getValueType();
2942 bool IsSigned = Opcode == ISD::SADDSAT;
2943 SDLoc DL(N);
2944
2945 // fold (add_sat x, undef) -> -1
2946 if (N0.isUndef() || N1.isUndef())
2947 return DAG.getAllOnesConstant(DL, VT);
2948
2949 // fold (add_sat c1, c2) -> c3
2950 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2951 return C;
2952
2953 // canonicalize constant to RHS
2956 return DAG.getNode(Opcode, DL, VT, N1, N0);
2957
2958 // fold vector ops
2959 if (VT.isVector()) {
2960 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2961 return FoldedVOp;
2962
2963 // fold (add_sat x, 0) -> x, vector edition
2965 return N0;
2966 }
2967
2968 // fold (add_sat x, 0) -> x
2969 if (isNullConstant(N1))
2970 return N0;
2971
2972 // If it cannot overflow, transform into an add.
2973 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
2974 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2975
2976 return SDValue();
2977}
2978
2980 bool ForceCarryReconstruction = false) {
2981 bool Masked = false;
2982
2983 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2984 while (true) {
2985 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2986 V = V.getOperand(0);
2987 continue;
2988 }
2989
2990 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2991 if (ForceCarryReconstruction)
2992 return V;
2993
2994 Masked = true;
2995 V = V.getOperand(0);
2996 continue;
2997 }
2998
2999 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3000 return V;
3001
3002 break;
3003 }
3004
3005 // If this is not a carry, return.
3006 if (V.getResNo() != 1)
3007 return SDValue();
3008
3009 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3010 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3011 return SDValue();
3012
3013 EVT VT = V->getValueType(0);
3014 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3015 return SDValue();
3016
3017 // If the result is masked, then no matter what kind of bool it is we can
3018 // return. If it isn't, then we need to make sure the bool type is either 0 or
3019 // 1 and not other values.
3020 if (Masked ||
3021 TLI.getBooleanContents(V.getValueType()) ==
3023 return V;
3024
3025 return SDValue();
3026}
3027
3028/// Given the operands of an add/sub operation, see if the 2nd operand is a
3029/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3030/// the opcode and bypass the mask operation.
3031static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3032 SelectionDAG &DAG, const SDLoc &DL) {
3033 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3034 N1 = N1.getOperand(0);
3035
3036 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3037 return SDValue();
3038
3039 EVT VT = N0.getValueType();
3040 SDValue N10 = N1.getOperand(0);
3041 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3042 N10 = N10.getOperand(0);
3043
3044 if (N10.getValueType() != VT)
3045 return SDValue();
3046
3047 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3048 return SDValue();
3049
3050 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3051 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3052 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3053}
3054
3055/// Helper for doing combines based on N0 and N1 being added to each other.
3056SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3057 SDNode *LocReference) {
3058 EVT VT = N0.getValueType();
3059 SDLoc DL(LocReference);
3060
3061 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3062 SDValue Y, N;
3063 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3064 return DAG.getNode(ISD::SUB, DL, VT, N0,
3065 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3066
3067 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3068 return V;
3069
3070 // Look for:
3071 // add (add x, 1), y
3072 // And if the target does not like this form then turn into:
3073 // sub y, (xor x, -1)
3074 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3075 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3076 // Limit this to after legalization if the add has wrap flags
3077 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3078 !N0->getFlags().hasNoSignedWrap()))) {
3079 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3080 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3081 }
3082
3083 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3084 // Hoist one-use subtraction by non-opaque constant:
3085 // (x - C) + y -> (x + y) - C
3086 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3087 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3088 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3089 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3090 }
3091 // Hoist one-use subtraction from non-opaque constant:
3092 // (C - x) + y -> (y - x) + C
3093 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3094 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3095 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3096 }
3097 }
3098
3099 // add (mul x, C), x -> mul x, C+1
3100 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3101 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3102 N0.hasOneUse()) {
3103 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3104 DAG.getConstant(1, DL, VT));
3105 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3106 }
3107
3108 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3109 // rather than 'add 0/-1' (the zext should get folded).
3110 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3111 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3112 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3114 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3115 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3116 }
3117
3118 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3119 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3120 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3121 if (TN->getVT() == MVT::i1) {
3122 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3123 DAG.getConstant(1, DL, VT));
3124 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3125 }
3126 }
3127
3128 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3129 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3130 N1.getResNo() == 0)
3131 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3132 N0, N1.getOperand(0), N1.getOperand(2));
3133
3134 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3136 if (SDValue Carry = getAsCarry(TLI, N1))
3137 return DAG.getNode(ISD::UADDO_CARRY, DL,
3138 DAG.getVTList(VT, Carry.getValueType()), N0,
3139 DAG.getConstant(0, DL, VT), Carry);
3140
3141 return SDValue();
3142}
3143
3144SDValue DAGCombiner::visitADDC(SDNode *N) {
3145 SDValue N0 = N->getOperand(0);
3146 SDValue N1 = N->getOperand(1);
3147 EVT VT = N0.getValueType();
3148 SDLoc DL(N);
3149
3150 // If the flag result is dead, turn this into an ADD.
3151 if (!N->hasAnyUseOfValue(1))
3152 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3153 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3154
3155 // canonicalize constant to RHS.
3156 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3157 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3158 if (N0C && !N1C)
3159 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3160
3161 // fold (addc x, 0) -> x + no carry out
3162 if (isNullConstant(N1))
3163 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3164 DL, MVT::Glue));
3165
3166 // If it cannot overflow, transform into an add.
3168 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3169 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3170
3171 return SDValue();
3172}
3173
3174/**
3175 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3176 * then the flip also occurs if computing the inverse is the same cost.
3177 * This function returns an empty SDValue in case it cannot flip the boolean
3178 * without increasing the cost of the computation. If you want to flip a boolean
3179 * no matter what, use DAG.getLogicalNOT.
3180 */
3182 const TargetLowering &TLI,
3183 bool Force) {
3184 if (Force && isa<ConstantSDNode>(V))
3185 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3186
3187 if (V.getOpcode() != ISD::XOR)
3188 return SDValue();
3189
3190 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3191 if (!Const)
3192 return SDValue();
3193
3194 EVT VT = V.getValueType();
3195
3196 bool IsFlip = false;
3197 switch(TLI.getBooleanContents(VT)) {
3199 IsFlip = Const->isOne();
3200 break;
3202 IsFlip = Const->isAllOnes();
3203 break;
3205 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3206 break;
3207 }
3208
3209 if (IsFlip)
3210 return V.getOperand(0);
3211 if (Force)
3212 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3213 return SDValue();
3214}
3215
3216SDValue DAGCombiner::visitADDO(SDNode *N) {
3217 SDValue N0 = N->getOperand(0);
3218 SDValue N1 = N->getOperand(1);
3219 EVT VT = N0.getValueType();
3220 bool IsSigned = (ISD::SADDO == N->getOpcode());
3221
3222 EVT CarryVT = N->getValueType(1);
3223 SDLoc DL(N);
3224
3225 // If the flag result is dead, turn this into an ADD.
3226 if (!N->hasAnyUseOfValue(1))
3227 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3228 DAG.getUNDEF(CarryVT));
3229
3230 // canonicalize constant to RHS.
3233 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3234
3235 // fold (addo x, 0) -> x + no carry out
3236 if (isNullOrNullSplat(N1))
3237 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3238
3239 // If it cannot overflow, transform into an add.
3240 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3241 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3242 DAG.getConstant(0, DL, CarryVT));
3243
3244 if (IsSigned) {
3245 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3246 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3247 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3248 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3249 } else {
3250 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3251 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3252 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3253 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3254 return CombineTo(
3255 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3256 }
3257
3258 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3259 return Combined;
3260
3261 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3262 return Combined;
3263 }
3264
3265 return SDValue();
3266}
3267
3268SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3269 EVT VT = N0.getValueType();
3270 if (VT.isVector())
3271 return SDValue();
3272
3273 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3274 // If Y + 1 cannot overflow.
3275 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3276 SDValue Y = N1.getOperand(0);
3277 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3279 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3280 N1.getOperand(2));
3281 }
3282
3283 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3285 if (SDValue Carry = getAsCarry(TLI, N1))
3286 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3287 DAG.getConstant(0, SDLoc(N), VT), Carry);
3288
3289 return SDValue();
3290}
3291
3292SDValue DAGCombiner::visitADDE(SDNode *N) {
3293 SDValue N0 = N->getOperand(0);
3294 SDValue N1 = N->getOperand(1);
3295 SDValue CarryIn = N->getOperand(2);
3296
3297 // canonicalize constant to RHS
3298 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3299 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3300 if (N0C && !N1C)
3301 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3302 N1, N0, CarryIn);
3303
3304 // fold (adde x, y, false) -> (addc x, y)
3305 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3306 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3307
3308 return SDValue();
3309}
3310
3311SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3312 SDValue N0 = N->getOperand(0);
3313 SDValue N1 = N->getOperand(1);
3314 SDValue CarryIn = N->getOperand(2);
3315 SDLoc DL(N);
3316
3317 // canonicalize constant to RHS
3318 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3319 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3320 if (N0C && !N1C)
3321 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3322
3323 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3324 if (isNullConstant(CarryIn)) {
3325 if (!LegalOperations ||
3326 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3327 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3328 }
3329
3330 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3331 if (isNullConstant(N0) && isNullConstant(N1)) {
3332 EVT VT = N0.getValueType();
3333 EVT CarryVT = CarryIn.getValueType();
3334 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3335 AddToWorklist(CarryExt.getNode());
3336 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3337 DAG.getConstant(1, DL, VT)),
3338 DAG.getConstant(0, DL, CarryVT));
3339 }
3340
3341 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3342 return Combined;
3343
3344 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3345 return Combined;
3346
3347 // We want to avoid useless duplication.
3348 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3349 // not a binary operation, this is not really possible to leverage this
3350 // existing mechanism for it. However, if more operations require the same
3351 // deduplication logic, then it may be worth generalize.
3352 SDValue Ops[] = {N1, N0, CarryIn};
3353 SDNode *CSENode =
3354 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3355 if (CSENode)
3356 return SDValue(CSENode, 0);
3357
3358 return SDValue();
3359}
3360
3361/**
3362 * If we are facing some sort of diamond carry propagation pattern try to
3363 * break it up to generate something like:
3364 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3365 *
3366 * The end result is usually an increase in operation required, but because the
3367 * carry is now linearized, other transforms can kick in and optimize the DAG.
3368 *
3369 * Patterns typically look something like
3370 * (uaddo A, B)
3371 * / \
3372 * Carry Sum
3373 * | \
3374 * | (uaddo_carry *, 0, Z)
3375 * | /
3376 * \ Carry
3377 * | /
3378 * (uaddo_carry X, *, *)
3379 *
3380 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3381 * produce a combine with a single path for carry propagation.
3382 */
3384 SelectionDAG &DAG, SDValue X,
3385 SDValue Carry0, SDValue Carry1,
3386 SDNode *N) {
3387 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3388 return SDValue();
3389 if (Carry1.getOpcode() != ISD::UADDO)
3390 return SDValue();
3391
3392 SDValue Z;
3393
3394 /**
3395 * First look for a suitable Z. It will present itself in the form of
3396 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3397 */
3398 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3399 isNullConstant(Carry0.getOperand(1))) {
3400 Z = Carry0.getOperand(2);
3401 } else if (Carry0.getOpcode() == ISD::UADDO &&
3402 isOneConstant(Carry0.getOperand(1))) {
3403 EVT VT = Carry0->getValueType(1);
3404 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3405 } else {
3406 // We couldn't find a suitable Z.
3407 return SDValue();
3408 }
3409
3410
3411 auto cancelDiamond = [&](SDValue A,SDValue B) {
3412 SDLoc DL(N);
3413 SDValue NewY =
3414 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3415 Combiner.AddToWorklist(NewY.getNode());
3416 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3417 DAG.getConstant(0, DL, X.getValueType()),
3418 NewY.getValue(1));
3419 };
3420
3421 /**
3422 * (uaddo A, B)
3423 * |
3424 * Sum
3425 * |
3426 * (uaddo_carry *, 0, Z)
3427 */
3428 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3429 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3430 }
3431
3432 /**
3433 * (uaddo_carry A, 0, Z)
3434 * |
3435 * Sum
3436 * |
3437 * (uaddo *, B)
3438 */
3439 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3440 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3441 }
3442
3443 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3444 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3445 }
3446
3447 return SDValue();
3448}
3449
3450// If we are facing some sort of diamond carry/borrow in/out pattern try to
3451// match patterns like:
3452//
3453// (uaddo A, B) CarryIn
3454// | \ |
3455// | \ |
3456// PartialSum PartialCarryOutX /
3457// | | /
3458// | ____|____________/
3459// | / |
3460// (uaddo *, *) \________
3461// | \ \
3462// | \ |
3463// | PartialCarryOutY |
3464// | \ |
3465// | \ /
3466// AddCarrySum | ______/
3467// | /
3468// CarryOut = (or *, *)
3469//
3470// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3471//
3472// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3473//
3474// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3475// with a single path for carry/borrow out propagation.
3477 SDValue N0, SDValue N1, SDNode *N) {
3478 SDValue Carry0 = getAsCarry(TLI, N0);
3479 if (!Carry0)
3480 return SDValue();
3481 SDValue Carry1 = getAsCarry(TLI, N1);
3482 if (!Carry1)
3483 return SDValue();
3484
3485 unsigned Opcode = Carry0.getOpcode();
3486 if (Opcode != Carry1.getOpcode())
3487 return SDValue();
3488 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3489 return SDValue();
3490 // Guarantee identical type of CarryOut
3491 EVT CarryOutType = N->getValueType(0);
3492 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3493 CarryOutType != Carry1.getValue(1).getValueType())
3494 return SDValue();
3495
3496 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3497 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3498 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3499 std::swap(Carry0, Carry1);
3500
3501 // Check if nodes are connected in expected way.
3502 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3503 Carry1.getOperand(1) != Carry0.getValue(0))
3504 return SDValue();
3505
3506 // The carry in value must be on the righthand side for subtraction.
3507 unsigned CarryInOperandNum =
3508 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3509 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3510 return SDValue();
3511 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3512
3513 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3514 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3515 return SDValue();
3516
3517 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3518 CarryIn = getAsCarry(TLI, CarryIn, true);
3519 if (!CarryIn)
3520 return SDValue();
3521
3522 SDLoc DL(N);
3523 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3524 Carry1->getValueType(0));
3525 SDValue Merged =
3526 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3527 Carry0.getOperand(1), CarryIn);
3528
3529 // Please note that because we have proven that the result of the UADDO/USUBO
3530 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3531 // therefore prove that if the first UADDO/USUBO overflows, the second
3532 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3533 // maximum value.
3534 //
3535 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3536 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3537 //
3538 // This is important because it means that OR and XOR can be used to merge
3539 // carry flags; and that AND can return a constant zero.
3540 //
3541 // TODO: match other operations that can merge flags (ADD, etc)
3542 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3543 if (N->getOpcode() == ISD::AND)
3544 return DAG.getConstant(0, DL, CarryOutType);
3545 return Merged.getValue(1);
3546}
3547
3548SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3549 SDValue CarryIn, SDNode *N) {
3550 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3551 // carry.
3552 if (isBitwiseNot(N0))
3553 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3554 SDLoc DL(N);
3555 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3556 N0.getOperand(0), NotC);
3557 return CombineTo(
3558 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3559 }
3560
3561 // Iff the flag result is dead:
3562 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3563 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3564 // or the dependency between the instructions.
3565 if ((N0.getOpcode() == ISD::ADD ||
3566 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3567 N0.getValue(1) != CarryIn)) &&
3568 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3569 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3570 N0.getOperand(0), N0.getOperand(1), CarryIn);
3571
3572 /**
3573 * When one of the uaddo_carry argument is itself a carry, we may be facing
3574 * a diamond carry propagation. In which case we try to transform the DAG
3575 * to ensure linear carry propagation if that is possible.
3576 */
3577 if (auto Y = getAsCarry(TLI, N1)) {
3578 // Because both are carries, Y and Z can be swapped.
3579 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3580 return R;
3581 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3582 return R;
3583 }
3584
3585 return SDValue();
3586}
3587
3588SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3589 SDValue CarryIn, SDNode *N) {
3590 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3591 if (isBitwiseNot(N0)) {
3592 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3593 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3594 N0.getOperand(0), NotC);
3595 }
3596
3597 return SDValue();
3598}
3599
3600SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3601 SDValue N0 = N->getOperand(0);
3602 SDValue N1 = N->getOperand(1);
3603 SDValue CarryIn = N->getOperand(2);
3604 SDLoc DL(N);
3605
3606 // canonicalize constant to RHS
3607 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3608 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3609 if (N0C && !N1C)
3610 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3611
3612 // fold (saddo_carry x, y, false) -> (saddo x, y)
3613 if (isNullConstant(CarryIn)) {
3614 if (!LegalOperations ||
3615 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3616 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3617 }
3618
3619 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3620 return Combined;
3621
3622 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3623 return Combined;
3624
3625 return SDValue();
3626}
3627
3628// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3629// clamp/truncation if necessary.
3630static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3631 SDValue RHS, SelectionDAG &DAG,
3632 const SDLoc &DL) {
3633 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3634 "Illegal truncation");
3635
3636 if (DstVT == SrcVT)
3637 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3638
3639 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3640 // clamping RHS.
3642 DstVT.getScalarSizeInBits());
3643 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3644 return SDValue();
3645
3646 SDValue SatLimit =
3648 DstVT.getScalarSizeInBits()),
3649 DL, SrcVT);
3650 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3651 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3652 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3653 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3654}
3655
3656// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3657// usubsat(a,b), optionally as a truncated type.
3658SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3659 if (N->getOpcode() != ISD::SUB ||
3660 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3661 return SDValue();
3662
3663 EVT SubVT = N->getValueType(0);
3664 SDValue Op0 = N->getOperand(0);
3665 SDValue Op1 = N->getOperand(1);
3666
3667 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3668 // they may be converted to usubsat(a,b).
3669 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3670 SDValue MaxLHS = Op0.getOperand(0);
3671 SDValue MaxRHS = Op0.getOperand(1);
3672 if (MaxLHS == Op1)
3673 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3674 if (MaxRHS == Op1)
3675 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3676 }
3677
3678 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3679 SDValue MinLHS = Op1.getOperand(0);
3680 SDValue MinRHS = Op1.getOperand(1);
3681 if (MinLHS == Op0)
3682 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3683 if (MinRHS == Op0)
3684 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3685 }
3686
3687 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3688 if (Op1.getOpcode() == ISD::TRUNCATE &&
3689 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3690 Op1.getOperand(0).hasOneUse()) {
3691 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3692 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3693 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3694 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3695 DAG, DL);
3696 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3697 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3698 DAG, DL);
3699 }
3700
3701 return SDValue();
3702}
3703
3704// Since it may not be valid to emit a fold to zero for vector initializers
3705// check if we can before folding.
3706static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3707 SelectionDAG &DAG, bool LegalOperations) {
3708 if (!VT.isVector())
3709 return DAG.getConstant(0, DL, VT);
3710 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3711 return DAG.getConstant(0, DL, VT);
3712 return SDValue();
3713}
3714
3715SDValue DAGCombiner::visitSUB(SDNode *N) {
3716 SDValue N0 = N->getOperand(0);
3717 SDValue N1 = N->getOperand(1);
3718 EVT VT = N0.getValueType();
3719 unsigned BitWidth = VT.getScalarSizeInBits();
3720 SDLoc DL(N);
3721
3722 auto PeekThroughFreeze = [](SDValue N) {
3723 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3724 return N->getOperand(0);
3725 return N;
3726 };
3727
3728 // fold (sub x, x) -> 0
3729 // FIXME: Refactor this and xor and other similar operations together.
3730 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3731 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3732
3733 // fold (sub c1, c2) -> c3
3734 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3735 return C;
3736
3737 // fold vector ops
3738 if (VT.isVector()) {
3739 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3740 return FoldedVOp;
3741
3742 // fold (sub x, 0) -> x, vector edition
3744 return N0;
3745 }
3746
3747 if (SDValue NewSel = foldBinOpIntoSelect(N))
3748 return NewSel;
3749
3750 // fold (sub x, c) -> (add x, -c)
3752 return DAG.getNode(ISD::ADD, DL, VT, N0,
3753 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3754
3755 if (isNullOrNullSplat(N0)) {
3756 // Right-shifting everything out but the sign bit followed by negation is
3757 // the same as flipping arithmetic/logical shift type without the negation:
3758 // -(X >>u 31) -> (X >>s 31)
3759 // -(X >>s 31) -> (X >>u 31)
3760 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3762 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3763 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3764 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3765 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3766 }
3767 }
3768
3769 // 0 - X --> 0 if the sub is NUW.
3770 if (N->getFlags().hasNoUnsignedWrap())
3771 return N0;
3772
3774 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3775 // N1 must be 0 because negating the minimum signed value is undefined.
3776 if (N->getFlags().hasNoSignedWrap())
3777 return N0;
3778
3779 // 0 - X --> X if X is 0 or the minimum signed value.
3780 return N1;
3781 }
3782
3783 // Convert 0 - abs(x).
3784 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3786 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3787 return Result;
3788
3789 // Fold neg(splat(neg(x)) -> splat(x)
3790 if (VT.isVector()) {
3791 SDValue N1S = DAG.getSplatValue(N1, true);
3792 if (N1S && N1S.getOpcode() == ISD::SUB &&
3793 isNullConstant(N1S.getOperand(0)))
3794 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3795 }
3796 }
3797
3798 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3800 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3801
3802 // fold (A - (0-B)) -> A+B
3803 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3804 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3805
3806 // fold A-(A-B) -> B
3807 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3808 return N1.getOperand(1);
3809
3810 // fold (A+B)-A -> B
3811 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3812 return N0.getOperand(1);
3813
3814 // fold (A+B)-B -> A
3815 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3816 return N0.getOperand(0);
3817
3818 // fold (A+C1)-C2 -> A+(C1-C2)
3819 if (N0.getOpcode() == ISD::ADD) {
3820 SDValue N01 = N0.getOperand(1);
3821 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3822 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3823 }
3824
3825 // fold C2-(A+C1) -> (C2-C1)-A
3826 if (N1.getOpcode() == ISD::ADD) {
3827 SDValue N11 = N1.getOperand(1);
3828 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3829 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3830 }
3831
3832 // fold (A-C1)-C2 -> A-(C1+C2)
3833 if (N0.getOpcode() == ISD::SUB) {
3834 SDValue N01 = N0.getOperand(1);
3835 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3836 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3837 }
3838
3839 // fold (c1-A)-c2 -> (c1-c2)-A
3840 if (N0.getOpcode() == ISD::SUB) {
3841 SDValue N00 = N0.getOperand(0);
3842 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3843 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3844 }
3845
3846 SDValue A, B, C;
3847
3848 // fold ((A+(B+C))-B) -> A+C
3849 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3850 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3851
3852 // fold ((A+(B-C))-B) -> A-C
3853 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3854 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3855
3856 // fold ((A-(B-C))-C) -> A-B
3857 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3858 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3859
3860 // fold (A-(B-C)) -> A+(C-B)
3861 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3862 return DAG.getNode(ISD::ADD, DL, VT, N0,
3863 DAG.getNode(ISD::SUB, DL, VT, C, B));
3864
3865 // A - (A & B) -> A & (~B)
3866 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3867 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3868 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3869
3870 // fold (A - (-B * C)) -> (A + (B * C))
3871 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3872 return DAG.getNode(ISD::ADD, DL, VT, N0,
3873 DAG.getNode(ISD::MUL, DL, VT, B, C));
3874
3875 // If either operand of a sub is undef, the result is undef
3876 if (N0.isUndef())
3877 return N0;
3878 if (N1.isUndef())
3879 return N1;
3880
3881 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3882 return V;
3883
3884 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3885 return V;
3886
3887 // Try to match AVGCEIL fixedwidth pattern
3888 if (SDValue V = foldSubToAvg(N, DL))
3889 return V;
3890
3891 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3892 return V;
3893
3894 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3895 return V;
3896
3897 // (A - B) - 1 -> add (xor B, -1), A
3899 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3900
3901 // Look for:
3902 // sub y, (xor x, -1)
3903 // And if the target does not like this form then turn into:
3904 // add (add x, y), 1
3905 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3906 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3907 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3908 }
3909
3910 // Hoist one-use addition by non-opaque constant:
3911 // (x + C) - y -> (x - y) + C
3912 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
3913 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3914 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3915 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3916 }
3917 // y - (x + C) -> (y - x) - C
3918 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
3919 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3920 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3921 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3922 }
3923 // (x - C) - y -> (x - y) - C
3924 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3925 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3926 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3927 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3928 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3929 }
3930 // (C - x) - y -> C - (x + y)
3931 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
3932 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3933 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3934 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3935 }
3936
3937 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3938 // rather than 'sub 0/1' (the sext should get folded).
3939 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3940 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3941 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3942 TLI.getBooleanContents(VT) ==
3944 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3945 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3946 }
3947
3948 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3949 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3950 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3951 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3952 SDValue S0 = N1.getOperand(0);
3953 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3955 if (C->getAPIntValue() == (BitWidth - 1))
3956 return DAG.getNode(ISD::ABS, DL, VT, S0);
3957 }
3958 }
3959
3960 // If the relocation model supports it, consider symbol offsets.
3961 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3962 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3963 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3964 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3965 if (GA->getGlobal() == GB->getGlobal())
3966 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3967 DL, VT);
3968 }
3969
3970 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3971 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3972 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3973 if (TN->getVT() == MVT::i1) {
3974 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3975 DAG.getConstant(1, DL, VT));
3976 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3977 }
3978 }
3979
3980 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3981 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
3982 const APInt &IntVal = N1.getConstantOperandAPInt(0);
3983 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3984 }
3985
3986 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3987 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3988 APInt NewStep = -N1.getConstantOperandAPInt(0);
3989 return DAG.getNode(ISD::ADD, DL, VT, N0,
3990 DAG.getStepVector(DL, VT, NewStep));
3991 }
3992
3993 // Prefer an add for more folding potential and possibly better codegen:
3994 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3995 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3996 SDValue ShAmt = N1.getOperand(1);
3997 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3998 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
3999 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4000 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4001 }
4002 }
4003
4004 // As with the previous fold, prefer add for more folding potential.
4005 // Subtracting SMIN/0 is the same as adding SMIN/0:
4006 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4007 if (N1.getOpcode() == ISD::SHL) {
4009 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4010 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4011 }
4012
4013 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4014 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4015 N0.getResNo() == 0 && N0.hasOneUse())
4016 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4017 N0.getOperand(0), N1, N0.getOperand(2));
4018
4020 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4021 if (SDValue Carry = getAsCarry(TLI, N0)) {
4022 SDValue X = N1;
4023 SDValue Zero = DAG.getConstant(0, DL, VT);
4024 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4025 return DAG.getNode(ISD::UADDO_CARRY, DL,
4026 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4027 Carry);
4028 }
4029 }
4030
4031 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4032 // sub C0, X --> xor X, C0
4033 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4034 if (!C0->isOpaque()) {
4035 const APInt &C0Val = C0->getAPIntValue();
4036 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4037 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4038 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4039 }
4040 }
4041
4042 // smax(a,b) - smin(a,b) --> abds(a,b)
4043 if (hasOperation(ISD::ABDS, VT) &&
4044 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4046 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4047
4048 // umax(a,b) - umin(a,b) --> abdu(a,b)
4049 if (hasOperation(ISD::ABDU, VT) &&
4050 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4052 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4053
4054 return SDValue();
4055}
4056
4057SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4058 unsigned Opcode = N->getOpcode();
4059 SDValue N0 = N->getOperand(0);
4060 SDValue N1 = N->getOperand(1);
4061 EVT VT = N0.getValueType();
4062 bool IsSigned = Opcode == ISD::SSUBSAT;
4063 SDLoc DL(N);
4064
4065 // fold (sub_sat x, undef) -> 0
4066 if (N0.isUndef() || N1.isUndef())
4067 return DAG.getConstant(0, DL, VT);
4068
4069 // fold (sub_sat x, x) -> 0
4070 if (N0 == N1)
4071 return DAG.getConstant(0, DL, VT);
4072
4073 // fold (sub_sat c1, c2) -> c3
4074 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4075 return C;
4076
4077 // fold vector ops
4078 if (VT.isVector()) {
4079 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4080 return FoldedVOp;
4081
4082 // fold (sub_sat x, 0) -> x, vector edition
4084 return N0;
4085 }
4086
4087 // fold (sub_sat x, 0) -> x
4088 if (isNullConstant(N1))
4089 return N0;
4090
4091 // If it cannot overflow, transform into an sub.
4092 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4093 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4094
4095 return SDValue();
4096}
4097
4098SDValue DAGCombiner::visitSUBC(SDNode *N) {
4099 SDValue N0 = N->getOperand(0);
4100 SDValue N1 = N->getOperand(1);
4101 EVT VT = N0.getValueType();
4102 SDLoc DL(N);
4103
4104 // If the flag result is dead, turn this into an SUB.
4105 if (!N->hasAnyUseOfValue(1))
4106 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4107 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4108
4109 // fold (subc x, x) -> 0 + no borrow
4110 if (N0 == N1)
4111 return CombineTo(N, DAG.getConstant(0, DL, VT),
4112 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4113
4114 // fold (subc x, 0) -> x + no borrow
4115 if (isNullConstant(N1))
4116 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4117
4118 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4119 if (isAllOnesConstant(N0))
4120 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4121 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4122
4123 return SDValue();
4124}
4125
4126SDValue DAGCombiner::visitSUBO(SDNode *N) {
4127 SDValue N0 = N->getOperand(0);
4128 SDValue N1 = N->getOperand(1);
4129 EVT VT = N0.getValueType();
4130 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4131
4132 EVT CarryVT = N->getValueType(1);
4133 SDLoc DL(N);
4134
4135 // If the flag result is dead, turn this into an SUB.
4136 if (!N->hasAnyUseOfValue(1))
4137 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4138 DAG.getUNDEF(CarryVT));
4139
4140 // fold (subo x, x) -> 0 + no borrow
4141 if (N0 == N1)
4142 return CombineTo(N, DAG.getConstant(0, DL, VT),
4143 DAG.getConstant(0, DL, CarryVT));
4144
4145 // fold (subox, c) -> (addo x, -c)
4147 if (IsSigned && !N1C->isMinSignedValue())
4148 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4149 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4150
4151 // fold (subo x, 0) -> x + no borrow
4152 if (isNullOrNullSplat(N1))
4153 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4154
4155 // If it cannot overflow, transform into an sub.
4156 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4157 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4158 DAG.getConstant(0, DL, CarryVT));
4159
4160 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4161 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4162 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4163 DAG.getConstant(0, DL, CarryVT));
4164
4165 return SDValue();
4166}
4167
4168SDValue DAGCombiner::visitSUBE(SDNode *N) {
4169 SDValue N0 = N->getOperand(0);
4170 SDValue N1 = N->getOperand(1);
4171 SDValue CarryIn = N->getOperand(2);
4172
4173 // fold (sube x, y, false) -> (subc x, y)
4174 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4175 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4176
4177 return SDValue();
4178}
4179
4180SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4181 SDValue N0 = N->getOperand(0);
4182 SDValue N1 = N->getOperand(1);
4183 SDValue CarryIn = N->getOperand(2);
4184
4185 // fold (usubo_carry x, y, false) -> (usubo x, y)
4186 if (isNullConstant(CarryIn)) {
4187 if (!LegalOperations ||
4188 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4189 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4190 }
4191
4192 return SDValue();
4193}
4194
4195SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4196 SDValue N0 = N->getOperand(0);
4197 SDValue N1 = N->getOperand(1);
4198 SDValue CarryIn = N->getOperand(2);
4199
4200 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4201 if (isNullConstant(CarryIn)) {
4202 if (!LegalOperations ||
4203 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4204 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4205 }
4206
4207 return SDValue();
4208}
4209
4210// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4211// UMULFIXSAT here.
4212SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4213 SDValue N0 = N->getOperand(0);
4214 SDValue N1 = N->getOperand(1);
4215 SDValue Scale = N->getOperand(2);
4216 EVT VT = N0.getValueType();
4217
4218 // fold (mulfix x, undef, scale) -> 0
4219 if (N0.isUndef() || N1.isUndef())
4220 return DAG.getConstant(0, SDLoc(N), VT);
4221
4222 // Canonicalize constant to RHS (vector doesn't have to splat)
4225 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4226
4227 // fold (mulfix x, 0, scale) -> 0
4228 if (isNullConstant(N1))
4229 return DAG.getConstant(0, SDLoc(N), VT);
4230
4231 return SDValue();
4232}
4233
4234SDValue DAGCombiner::visitMUL(SDNode *N) {
4235 SDValue N0 = N->getOperand(0);
4236 SDValue N1 = N->getOperand(1);
4237 EVT VT = N0.getValueType();
4238 SDLoc DL(N);
4239
4240 // fold (mul x, undef) -> 0
4241 if (N0.isUndef() || N1.isUndef())
4242 return DAG.getConstant(0, DL, VT);
4243
4244 // fold (mul c1, c2) -> c1*c2
4245 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4246 return C;
4247
4248 // canonicalize constant to RHS (vector doesn't have to splat)
4251 return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4252
4253 bool N1IsConst = false;
4254 bool N1IsOpaqueConst = false;
4255 APInt ConstValue1;
4256
4257 // fold vector ops
4258 if (VT.isVector()) {
4259 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4260 return FoldedVOp;
4261
4262 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4263 assert((!N1IsConst ||
4264 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4265 "Splat APInt should be element width");
4266 } else {
4267 N1IsConst = isa<ConstantSDNode>(N1);
4268 if (N1IsConst) {
4269 ConstValue1 = N1->getAsAPIntVal();
4270 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4271 }
4272 }
4273
4274 // fold (mul x, 0) -> 0
4275 if (N1IsConst && ConstValue1.isZero())
4276 return N1;
4277
4278 // fold (mul x, 1) -> x
4279 if (N1IsConst && ConstValue1.isOne())
4280 return N0;
4281
4282 if (SDValue NewSel = foldBinOpIntoSelect(N))
4283 return NewSel;
4284
4285 // fold (mul x, -1) -> 0-x
4286 if (N1IsConst && ConstValue1.isAllOnes())
4287 return DAG.getNegative(N0, DL, VT);
4288
4289 // fold (mul x, (1 << c)) -> x << c
4290 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4291 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4292 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4293 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4294 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4295 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4296 }
4297 }
4298
4299 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4300 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4301 unsigned Log2Val = (-ConstValue1).logBase2();
4302 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4303
4304 // FIXME: If the input is something that is easily negated (e.g. a
4305 // single-use add), we should put the negate there.
4306 return DAG.getNode(ISD::SUB, DL, VT,
4307 DAG.getConstant(0, DL, VT),
4308 DAG.getNode(ISD::SHL, DL, VT, N0,
4309 DAG.getConstant(Log2Val, DL, ShiftVT)));
4310 }
4311
4312 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4313 // hi result is in use in case we hit this mid-legalization.
4314 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4315 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4316 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4317 // TODO: Can we match commutable operands with getNodeIfExists?
4318 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4319 if (LoHi->hasAnyUseOfValue(1))
4320 return SDValue(LoHi, 0);
4321 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4322 if (LoHi->hasAnyUseOfValue(1))
4323 return SDValue(LoHi, 0);
4324 }
4325 }
4326
4327 // Try to transform:
4328 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4329 // mul x, (2^N + 1) --> add (shl x, N), x
4330 // mul x, (2^N - 1) --> sub (shl x, N), x
4331 // Examples: x * 33 --> (x << 5) + x
4332 // x * 15 --> (x << 4) - x
4333 // x * -33 --> -((x << 5) + x)
4334 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4335 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4336 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4337 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4338 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4339 // x * 0xf800 --> (x << 16) - (x << 11)
4340 // x * -0x8800 --> -((x << 15) + (x << 11))
4341 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4342 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4343 // TODO: We could handle more general decomposition of any constant by
4344 // having the target set a limit on number of ops and making a
4345 // callback to determine that sequence (similar to sqrt expansion).
4346 unsigned MathOp = ISD::DELETED_NODE;
4347 APInt MulC = ConstValue1.abs();
4348 // The constant `2` should be treated as (2^0 + 1).
4349 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4350 MulC.lshrInPlace(TZeros);
4351 if ((MulC - 1).isPowerOf2())
4352 MathOp = ISD::ADD;
4353 else if ((MulC + 1).isPowerOf2())
4354 MathOp = ISD::SUB;
4355
4356 if (MathOp != ISD::DELETED_NODE) {
4357 unsigned ShAmt =
4358 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4359 ShAmt += TZeros;
4360 assert(ShAmt < VT.getScalarSizeInBits() &&
4361 "multiply-by-constant generated out of bounds shift");
4362 SDValue Shl =
4363 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4364 SDValue R =
4365 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4366 DAG.getNode(ISD::SHL, DL, VT, N0,
4367 DAG.getConstant(TZeros, DL, VT)))
4368 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4369 if (ConstValue1.isNegative())
4370 R = DAG.getNegative(R, DL, VT);
4371 return R;
4372 }
4373 }
4374
4375 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4376 if (N0.getOpcode() == ISD::SHL) {
4377 SDValue N01 = N0.getOperand(1);
4378 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4379 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4380 }
4381
4382 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4383 // use.
4384 {
4385 SDValue Sh, Y;
4386
4387 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4388 if (N0.getOpcode() == ISD::SHL &&
4390 Sh = N0; Y = N1;
4391 } else if (N1.getOpcode() == ISD::SHL &&
4393 N1->hasOneUse()) {
4394 Sh = N1; Y = N0;
4395 }
4396
4397 if (Sh.getNode()) {
4398 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4399 return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4400 }
4401 }
4402
4403 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4404 if (N0.getOpcode() == ISD::ADD &&
4408 return DAG.getNode(
4409 ISD::ADD, DL, VT,
4410 DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4411 DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4412
4413 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4415 if (N0.getOpcode() == ISD::VSCALE && NC1) {
4416 const APInt &C0 = N0.getConstantOperandAPInt(0);
4417 const APInt &C1 = NC1->getAPIntValue();
4418 return DAG.getVScale(DL, VT, C0 * C1);
4419 }
4420
4421 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4422 APInt MulVal;
4423 if (N0.getOpcode() == ISD::STEP_VECTOR &&
4424 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4425 const APInt &C0 = N0.getConstantOperandAPInt(0);
4426 APInt NewStep = C0 * MulVal;
4427 return DAG.getStepVector(DL, VT, NewStep);
4428 }
4429
4430 // Fold ((mul x, 0/undef) -> 0,
4431 // (mul x, 1) -> x) -> x)
4432 // -> and(x, mask)
4433 // We can replace vectors with '0' and '1' factors with a clearing mask.
4434 if (VT.isFixedLengthVector()) {
4435 unsigned NumElts = VT.getVectorNumElements();
4436 SmallBitVector ClearMask;
4437 ClearMask.reserve(NumElts);
4438 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4439 if (!V || V->isZero()) {
4440 ClearMask.push_back(true);
4441 return true;
4442 }
4443 ClearMask.push_back(false);
4444 return V->isOne();
4445 };
4446 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4447 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4448 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4449 EVT LegalSVT = N1.getOperand(0).getValueType();
4450 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4451 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4453 for (unsigned I = 0; I != NumElts; ++I)
4454 if (ClearMask[I])
4455 Mask[I] = Zero;
4456 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4457 }
4458 }
4459
4460 // reassociate mul
4461 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4462 return RMUL;
4463
4464 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4465 if (SDValue SD =
4466 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4467 return SD;
4468
4469 // Simplify the operands using demanded-bits information.
4471 return SDValue(N, 0);
4472
4473 return SDValue();
4474}
4475
4476/// Return true if divmod libcall is available.
4478 const TargetLowering &TLI) {
4479 RTLIB::Libcall LC;
4480 EVT NodeType = Node->getValueType(0);
4481 if (!NodeType.isSimple())
4482 return false;
4483 switch (NodeType.getSimpleVT().SimpleTy) {
4484 default: return false; // No libcall for vector types.
4485 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4486 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4487 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4488 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4489 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4490 }
4491
4492 return TLI.getLibcallName(LC) != nullptr;
4493}
4494
4495/// Issue divrem if both quotient and remainder are needed.
4496SDValue DAGCombiner::useDivRem(SDNode *Node) {
4497 if (Node->use_empty())
4498 return SDValue(); // This is a dead node, leave it alone.
4499
4500 unsigned Opcode = Node->getOpcode();
4501 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4502 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4503
4504 // DivMod lib calls can still work on non-legal types if using lib-calls.
4505 EVT VT = Node->getValueType(0);
4506 if (VT.isVector() || !VT.isInteger())
4507 return SDValue();
4508
4509 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4510 return SDValue();
4511
4512 // If DIVREM is going to get expanded into a libcall,
4513 // but there is no libcall available, then don't combine.
4514 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4516 return SDValue();
4517
4518 // If div is legal, it's better to do the normal expansion
4519 unsigned OtherOpcode = 0;
4520 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4521 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4522 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4523 return SDValue();
4524 } else {
4525 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4526 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4527 return SDValue();
4528 }
4529
4530 SDValue Op0 = Node->getOperand(0);
4531 SDValue Op1 = Node->getOperand(1);
4532 SDValue combined;
4533 for (SDNode *User : Op0->uses()) {
4534 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4535 User->use_empty())
4536 continue;
4537 // Convert the other matching node(s), too;
4538 // otherwise, the DIVREM may get target-legalized into something
4539 // target-specific that we won't be able to recognize.
4540 unsigned UserOpc = User->getOpcode();
4541 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4542 User->getOperand(0) == Op0 &&
4543 User->getOperand(1) == Op1) {
4544 if (!combined) {
4545 if (UserOpc == OtherOpcode) {
4546 SDVTList VTs = DAG.getVTList(VT, VT);
4547 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4548 } else if (UserOpc == DivRemOpc) {
4549 combined = SDValue(User, 0);
4550 } else {
4551 assert(UserOpc == Opcode);
4552 continue;
4553 }
4554 }
4555 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4556 CombineTo(User, combined);
4557 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4558 CombineTo(User, combined.getValue(1));
4559 }
4560 }
4561 return combined;
4562}
4563
4565 SDValue N0 = N->getOperand(0);
4566 SDValue N1 = N->getOperand(1);
4567 EVT VT = N->getValueType(0);
4568 SDLoc DL(N);
4569
4570 unsigned Opc = N->getOpcode();
4571 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4573
4574 // X / undef -> undef
4575 // X % undef -> undef
4576 // X / 0 -> undef
4577 // X % 0 -> undef
4578 // NOTE: This includes vectors where any divisor element is zero/undef.
4579 if (DAG.isUndef(Opc, {N0, N1}))
4580 return DAG.getUNDEF(VT);
4581
4582 // undef / X -> 0
4583 // undef % X -> 0
4584 if (N0.isUndef())
4585 return DAG.getConstant(0, DL, VT);
4586
4587 // 0 / X -> 0
4588 // 0 % X -> 0
4590 if (N0C && N0C->isZero())
4591 return N0;
4592
4593 // X / X -> 1
4594 // X % X -> 0
4595 if (N0 == N1)
4596 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4597
4598 // X / 1 -> X
4599 // X % 1 -> 0
4600 // If this is a boolean op (single-bit element type), we can't have
4601 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4602 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4603 // it's a 1.
4604 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4605 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4606
4607 return SDValue();
4608}
4609
4610SDValue DAGCombiner::visitSDIV(SDNode *N) {
4611 SDValue N0 = N->getOperand(0);
4612 SDValue N1 = N->getOperand(1);
4613 EVT VT = N->getValueType(0);
4614 EVT CCVT = getSetCCResultType(VT);
4615 SDLoc DL(N);
4616
4617 // fold (sdiv c1, c2) -> c1/c2
4618 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4619 return C;
4620
4621 // fold vector ops
4622 if (VT.isVector())
4623 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4624 return FoldedVOp;
4625
4626 // fold (sdiv X, -1) -> 0-X
4628 if (N1C && N1C->isAllOnes())
4629 return DAG.getNegative(N0, DL, VT);
4630
4631 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4632 if (N1C && N1C->isMinSignedValue())
4633 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4634 DAG.getConstant(1, DL, VT),
4635 DAG.getConstant(0, DL, VT));
4636
4637 if (SDValue V = simplifyDivRem(N, DAG))
4638 return V;
4639
4640 if (SDValue NewSel = foldBinOpIntoSelect(N))
4641 return NewSel;
4642
4643 // If we know the sign bits of both operands are zero, strength reduce to a
4644 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4645 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4646 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4647
4648 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4649 // If the corresponding remainder node exists, update its users with
4650 // (Dividend - (Quotient * Divisor).
4651 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4652 { N0, N1 })) {
4653 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4654 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4655 AddToWorklist(Mul.getNode());
4656 AddToWorklist(Sub.getNode());
4657 CombineTo(RemNode, Sub);
4658 }
4659 return V;
4660 }
4661
4662 // sdiv, srem -> sdivrem
4663 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4664 // true. Otherwise, we break the simplification logic in visitREM().
4666 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4667 if (SDValue DivRem = useDivRem(N))
4668 return DivRem;
4669
4670 return SDValue();
4671}
4672
4673static bool isDivisorPowerOfTwo(SDValue Divisor) {
4674 // Helper for determining whether a value is a power-2 constant scalar or a
4675 // vector of such elements.
4676 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4677 if (C->isZero() || C->isOpaque())
4678 return false;
4679 if (C->getAPIntValue().isPowerOf2())
4680 return true;
4681 if (C->getAPIntValue().isNegatedPowerOf2())
4682 return true;
4683 return false;
4684 };
4685
4686 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4687}
4688
4689SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4690 SDLoc DL(N);
4691 EVT VT = N->getValueType(0);
4692 EVT CCVT = getSetCCResultType(VT);
4693 unsigned BitWidth = VT.getScalarSizeInBits();
4694
4695 // fold (sdiv X, pow2) -> simple ops after legalize
4696 // FIXME: We check for the exact bit here because the generic lowering gives
4697 // better results in that case. The target-specific lowering should learn how
4698 // to handle exact sdivs efficiently.
4699 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4700 // Target-specific implementation of sdiv x, pow2.
4701 if (SDValue Res = BuildSDIVPow2(N))
4702 return Res;
4703
4704 // Create constants that are functions of the shift amount value.
4705 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4706 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4707 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4708 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4709 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4710 if (!isConstantOrConstantVector(Inexact))
4711 return SDValue();
4712
4713 // Splat the sign bit into the register
4714 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4715 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4716 AddToWorklist(Sign.getNode());
4717
4718 // Add (N0 < 0) ? abs2 - 1 : 0;
4719 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4720 AddToWorklist(Srl.getNode());
4721 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4722 AddToWorklist(Add.getNode());
4723 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4724 AddToWorklist(Sra.getNode());
4725
4726 // Special case: (sdiv X, 1) -> X
4727 // Special Case: (sdiv X, -1) -> 0-X
4728 SDValue One = DAG.getConstant(1, DL, VT);
4730 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4731 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4732 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4733 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4734
4735 // If dividing by a positive value, we're done. Otherwise, the result must
4736 // be negated.
4737 SDValue Zero = DAG.getConstant(0, DL, VT);
4738 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4739
4740 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4741 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4742 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4743 return Res;
4744 }
4745
4746 // If integer divide is expensive and we satisfy the requirements, emit an
4747 // alternate sequence. Targets may check function attributes for size/speed
4748 // trade-offs.
4751 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4752 if (SDValue Op = BuildSDIV(N))
4753 return Op;
4754
4755 return SDValue();
4756}
4757
4758SDValue DAGCombiner::visitUDIV(SDNode *N) {
4759 SDValue N0 = N->getOperand(0);
4760 SDValue N1 = N->getOperand(1);
4761 EVT VT = N->getValueType(0);
4762 EVT CCVT = getSetCCResultType(VT);
4763 SDLoc DL(N);
4764
4765 // fold (udiv c1, c2) -> c1/c2
4766 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4767 return C;
4768
4769 // fold vector ops
4770 if (VT.isVector())
4771 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4772 return FoldedVOp;
4773
4774 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4776 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4777 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4778 DAG.getConstant(1, DL, VT),
4779 DAG.getConstant(0, DL, VT));
4780 }
4781
4782 if (SDValue V = simplifyDivRem(N, DAG))
4783 return V;
4784
4785 if (SDValue NewSel = foldBinOpIntoSelect(N))
4786 return NewSel;
4787
4788 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4789 // If the corresponding remainder node exists, update its users with
4790 // (Dividend - (Quotient * Divisor).
4791 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4792 { N0, N1 })) {
4793 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4794 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4795 AddToWorklist(Mul.getNode());
4796 AddToWorklist(Sub.getNode());
4797 CombineTo(RemNode, Sub);
4798 }
4799 return V;
4800 }
4801
4802 // sdiv, srem -> sdivrem
4803 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4804 // true. Otherwise, we break the simplification logic in visitREM().
4806 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4807 if (SDValue DivRem = useDivRem(N))
4808 return DivRem;
4809
4810 return SDValue();
4811}
4812
4813SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4814 SDLoc DL(N);
4815 EVT VT = N->getValueType(0);
4816
4817 // fold (udiv x, (1 << c)) -> x >>u c
4818 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4819 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4820 AddToWorklist(LogBase2.getNode());
4821
4822 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4823 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4824 AddToWorklist(Trunc.getNode());
4825 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4826 }
4827 }
4828
4829 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4830 if (N1.getOpcode() == ISD::SHL) {
4831 SDValue N10 = N1.getOperand(0);
4832 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4833 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4834 AddToWorklist(LogBase2.getNode());
4835
4836 EVT ADDVT = N1.getOperand(1).getValueType();
4837 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4838 AddToWorklist(Trunc.getNode());
4839 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4840 AddToWorklist(Add.getNode());
4841 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4842 }
4843 }
4844 }
4845
4846 // fold (udiv x, c) -> alternate
4849 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4850 if (SDValue Op = BuildUDIV(N))
4851 return Op;
4852
4853 return SDValue();
4854}
4855
4856SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4857 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4858 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4859 // Target-specific implementation of srem x, pow2.
4860 if (SDValue Res = BuildSREMPow2(N))
4861 return Res;
4862 }
4863 return SDValue();
4864}
4865
4866// handles ISD::SREM and ISD::UREM
4867SDValue DAGCombiner::visitREM(SDNode *N) {
4868 unsigned Opcode = N->getOpcode();
4869 SDValue N0 = N->getOperand(0);
4870 SDValue N1 = N->getOperand(1);
4871 EVT VT = N->getValueType(0);
4872 EVT CCVT = getSetCCResultType(VT);
4873
4874 bool isSigned = (Opcode == ISD::SREM);
4875 SDLoc DL(N);
4876
4877 // fold (rem c1, c2) -> c1%c2
4878 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4879 return C;
4880
4881 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4882 // Freeze the numerator to avoid a miscompile with an undefined value.
4883 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4884 CCVT.isVector() == VT.isVector()) {
4885 SDValue F0 = DAG.getFreeze(N0);
4886 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4887 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4888 }
4889
4890 if (SDValue V = simplifyDivRem(N, DAG))
4891 return V;
4892
4893 if (SDValue NewSel = foldBinOpIntoSelect(N))
4894 return NewSel;
4895
4896 if (isSigned) {
4897 // If we know the sign bits of both operands are zero, strength reduce to a
4898 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4899 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4900 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4901 } else {
4902 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4903 // fold (urem x, pow2) -> (and x, pow2-1)
4904 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4905 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4906 AddToWorklist(Add.getNode());
4907 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4908 }
4909 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4910 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
4911 // TODO: We should sink the following into isKnownToBePowerOfTwo
4912 // using a OrZero parameter analogous to our handling in ValueTracking.
4913 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
4915 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4916 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4917 AddToWorklist(Add.getNode());
4918 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4919 }
4920 }
4921
4923
4924 // If X/C can be simplified by the division-by-constant logic, lower
4925 // X%C to the equivalent of X-X/C*C.
4926 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4927 // speculative DIV must not cause a DIVREM conversion. We guard against this
4928 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4929 // combine will not return a DIVREM. Regardless, checking cheapness here
4930 // makes sense since the simplification results in fatter code.
4931 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4932 if (isSigned) {
4933 // check if we can build faster implementation for srem
4934 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4935 return OptimizedRem;
4936 }
4937
4938 SDValue OptimizedDiv =
4939 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4940 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4941 // If the equivalent Div node also exists, update its users.
4942 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4943 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4944 { N0, N1 }))
4945 CombineTo(DivNode, OptimizedDiv);
4946 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4947 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4948 AddToWorklist(OptimizedDiv.getNode());
4949 AddToWorklist(Mul.getNode());
4950 return Sub;
4951 }
4952 }
4953
4954 // sdiv, srem -> sdivrem
4955 if (SDValue DivRem = useDivRem(N))
4956 return DivRem.getValue(1);
4957
4958 return SDValue();
4959}
4960
4961SDValue DAGCombiner::visitMULHS(SDNode *N) {
4962 SDValue N0 = N->getOperand(0);
4963 SDValue N1 = N->getOperand(1);
4964 EVT VT = N->getValueType(0);
4965 SDLoc DL(N);
4966
4967 // fold (mulhs c1, c2)
4968 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4969 return C;
4970
4971 // canonicalize constant to RHS.
4974 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4975
4976 if (VT.isVector()) {
4977 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4978 return FoldedVOp;
4979
4980 // fold (mulhs x, 0) -> 0
4981 // do not return N1, because undef node may exist.
4983 return DAG.getConstant(0, DL, VT);
4984 }
4985
4986 // fold (mulhs x, 0) -> 0
4987 if (isNullConstant(N1))
4988 return N1;
4989
4990 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4991 if (isOneConstant(N1))
4992 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4995
4996 // fold (mulhs x, undef) -> 0
4997 if (N0.isUndef() || N1.isUndef())
4998 return DAG.getConstant(0, DL, VT);
4999
5000 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5001 // plus a shift.
5002 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5003 !VT.isVector()) {
5004 MVT Simple = VT.getSimpleVT();
5005 unsigned SimpleSize = Simple.getSizeInBits();
5006 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5007 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5008 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5009 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5010 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5011 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5012 DAG.getConstant(SimpleSize, DL,
5014 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5015 }
5016 }
5017
5018 return SDValue();
5019}
5020
5021SDValue DAGCombiner::visitMULHU(SDNode *N) {
5022 SDValue N0 = N->getOperand(0);
5023 SDValue N1 = N->getOperand(1);
5024 EVT VT = N->getValueType(0);
5025 SDLoc DL(N);
5026
5027 // fold (mulhu c1, c2)
5028 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5029 return C;
5030
5031 // canonicalize constant to RHS.
5034 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5035
5036 if (VT.isVector()) {
5037 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5038 return FoldedVOp;
5039
5040 // fold (mulhu x, 0) -> 0
5041 // do not return N1, because undef node may exist.
5043 return DAG.getConstant(0, DL, VT);
5044 }
5045
5046 // fold (mulhu x, 0) -> 0
5047 if (isNullConstant(N1))
5048 return N1;
5049
5050 // fold (mulhu x, 1) -> 0
5051 if (isOneConstant(N1))
5052 return DAG.getConstant(0, DL, N0.getValueType());
5053
5054 // fold (mulhu x, undef) -> 0
5055 if (N0.isUndef() || N1.isUndef())
5056 return DAG.getConstant(0, DL, VT);
5057
5058 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5059 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5060 hasOperation(ISD::SRL, VT)) {
5061 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5062 unsigned NumEltBits = VT.getScalarSizeInBits();
5063 SDValue SRLAmt = DAG.getNode(
5064 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5065 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5066 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5067 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5068 }
5069 }
5070
5071 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5072 // plus a shift.
5073 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5074 !VT.isVector()) {
5075 MVT Simple = VT.getSimpleVT();
5076 unsigned SimpleSize = Simple.getSizeInBits();
5077 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5078 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5079 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5080 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5081 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5082 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5083 DAG.getConstant(SimpleSize, DL,
5085 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5086 }
5087 }
5088
5089 // Simplify the operands using demanded-bits information.
5090 // We don't have demanded bits support for MULHU so this just enables constant
5091 // folding based on known bits.
5093 return SDValue(N, 0);
5094
5095 return SDValue();
5096}
5097
5098SDValue DAGCombiner::visitAVG(SDNode *N) {
5099 unsigned Opcode = N->getOpcode();
5100 SDValue N0 = N->getOperand(0);
5101 SDValue N1 = N->getOperand(1);
5102 EVT VT = N->getValueType(0);
5103 SDLoc DL(N);
5104
5105 // fold (avg c1, c2)
5106 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5107 return C;
5108
5109 // canonicalize constant to RHS.
5112 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5113
5114 if (VT.isVector()) {
5115 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5116 return FoldedVOp;
5117
5118 // fold (avgfloor x, 0) -> x >> 1
5120 if (Opcode == ISD::AVGFLOORS)
5121 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5122 if (Opcode == ISD::AVGFLOORU)
5123 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5124 }
5125 }
5126
5127 // fold (avg x, undef) -> x
5128 if (N0.isUndef())
5129 return N1;
5130 if (N1.isUndef())
5131 return N0;
5132
5133 // Fold (avg x, x) --> x
5134 if (N0 == N1 && Level >= AfterLegalizeTypes)
5135 return N0;
5136
5137 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5138
5139 return SDValue();
5140}
5141
5142SDValue DAGCombiner::visitABD(SDNode *N) {
5143 unsigned Opcode = N->getOpcode();
5144 SDValue N0 = N->getOperand(0);
5145 SDValue N1 = N->getOperand(1);
5146 EVT VT = N->getValueType(0);
5147 SDLoc DL(N);
5148
5149 // fold (abd c1, c2)
5150 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5151 return C;
5152
5153 // canonicalize constant to RHS.
5156 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5157
5158 if (VT.isVector()) {
5159 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5160 return FoldedVOp;
5161
5162 // fold (abds x, 0) -> abs x
5163 // fold (abdu x, 0) -> x
5165 if (Opcode == ISD::ABDS)
5166 return DAG.getNode(ISD::ABS, DL, VT, N0);
5167 if (Opcode == ISD::ABDU)
5168 return N0;
5169 }
5170 }
5171
5172 // fold (abd x, undef) -> 0
5173 if (N0.isUndef() || N1.isUndef())
5174 return DAG.getConstant(0, DL, VT);
5175
5176 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5177 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5178 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5179 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5180
5181 return SDValue();
5182}
5183
5184/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5185/// give the opcodes for the two computations that are being performed. Return
5186/// true if a simplification was made.
5187SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5188 unsigned HiOp) {
5189 // If the high half is not needed, just compute the low half.
5190 bool HiExists = N->hasAnyUseOfValue(1);
5191 if (!HiExists && (!LegalOperations ||
5192 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5193 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5194 return CombineTo(N, Res, Res);
5195 }
5196
5197 // If the low half is not needed, just compute the high half.
5198 bool LoExists = N->hasAnyUseOfValue(0);
5199 if (!LoExists && (!LegalOperations ||
5200 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5201 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5202 return CombineTo(N, Res, Res);
5203 }
5204
5205 // If both halves are used, return as it is.
5206 if (LoExists && HiExists)
5207 return SDValue();
5208
5209 // If the two computed results can be simplified separately, separate them.
5210 if (LoExists) {
5211 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5212 AddToWorklist(Lo.getNode());
5213 SDValue LoOpt = combine(Lo.getNode());
5214 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5215 (!LegalOperations ||
5216 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5217 return CombineTo(N, LoOpt, LoOpt);
5218 }
5219
5220 if (HiExists) {
5221 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5222 AddToWorklist(Hi.getNode());
5223 SDValue HiOpt = combine(Hi.getNode());
5224 if (HiOpt.getNode() && HiOpt != Hi &&
5225 (!LegalOperations ||
5226 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5227 return CombineTo(N, HiOpt, HiOpt);
5228 }
5229
5230 return SDValue();
5231}
5232
5233SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5234 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5235 return Res;
5236
5237 SDValue N0 = N->getOperand(0);
5238 SDValue N1 = N->getOperand(1);
5239 EVT VT = N->getValueType(0);
5240 SDLoc DL(N);
5241
5242 // Constant fold.
5243 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5244 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5245
5246 // canonicalize constant to RHS (vector doesn't have to splat)
5249 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5250
5251 // If the type is twice as wide is legal, transform the mulhu to a wider
5252 // multiply plus a shift.
5253 if (VT.isSimple() && !VT.isVector()) {
5254 MVT Simple = VT.getSimpleVT();
5255 unsigned SimpleSize = Simple.getSizeInBits();
5256 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5257 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5258 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5259 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5260 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5261 // Compute the high part as N1.
5262 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5263 DAG.getConstant(SimpleSize, DL,
5264 getShiftAmountTy(Lo.getValueType())));
5265 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5266 // Compute the low part as N0.
5267 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5268 return CombineTo(N, Lo, Hi);
5269 }
5270 }
5271
5272 return SDValue();
5273}
5274
5275SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5276 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5277 return Res;
5278
5279 SDValue N0 = N->getOperand(0);
5280 SDValue N1 = N->getOperand(1);
5281 EVT VT = N->getValueType(0);
5282 SDLoc DL(N);
5283
5284 // Constant fold.
5285 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5286 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5287
5288 // canonicalize constant to RHS (vector doesn't have to splat)
5291 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5292
5293 // (umul_lohi N0, 0) -> (0, 0)
5294 if (isNullConstant(N1)) {
5295 SDValue Zero = DAG.getConstant(0, DL, VT);
5296 return CombineTo(N, Zero, Zero);
5297 }
5298
5299 // (umul_lohi N0, 1) -> (N0, 0)
5300 if (isOneConstant(N1)) {
5301 SDValue Zero = DAG.getConstant(0, DL, VT);
5302 return CombineTo(N, N0, Zero);
5303 }
5304
5305 // If the type is twice as wide is legal, transform the mulhu to a wider
5306 // multiply plus a shift.
5307 if (VT.isSimple() && !VT.isVector()) {
5308 MVT Simple = VT.getSimpleVT();
5309 unsigned SimpleSize = Simple.getSizeInBits();
5310 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5311 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5312 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5313 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5314 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5315 // Compute the high part as N1.
5316 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5317 DAG.getConstant(SimpleSize, DL,
5318 getShiftAmountTy(Lo.getValueType())));
5319 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5320 // Compute the low part as N0.
5321 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5322 return CombineTo(N, Lo, Hi);
5323 }
5324 }
5325
5326 return SDValue();
5327}
5328
5329SDValue DAGCombiner::visitMULO(SDNode *N) {
5330 SDValue N0 = N->getOperand(0);
5331 SDValue N1 = N->getOperand(1);
5332 EVT VT = N0.getValueType();
5333 bool IsSigned = (ISD::SMULO == N->getOpcode());
5334
5335 EVT CarryVT = N->getValueType(1);
5336 SDLoc DL(N);
5337
5340
5341 // fold operation with constant operands.
5342 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5343 // multiple results.
5344 if (N0C && N1C) {
5345 bool Overflow;
5346 APInt Result =
5347 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5348 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5349 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5350 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5351 }
5352
5353 // canonicalize constant to RHS.
5356 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5357
5358 // fold (mulo x, 0) -> 0 + no carry out
5359 if (isNullOrNullSplat(N1))
5360 return CombineTo(N, DAG.getConstant(0, DL, VT),
5361 DAG.getConstant(0, DL, CarryVT));
5362
5363 // (mulo x, 2) -> (addo x, x)
5364 // FIXME: This needs a freeze.
5365 if (N1C && N1C->getAPIntValue() == 2 &&
5366 (!IsSigned || VT.getScalarSizeInBits() > 2))
5367 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5368 N->getVTList(), N0, N0);
5369
5370 // A 1 bit SMULO overflows if both inputs are 1.
5371 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5372 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5373 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5374 DAG.getConstant(0, DL, VT), ISD::SETNE);
5375 return CombineTo(N, And, Cmp);
5376 }
5377
5378 // If it cannot overflow, transform into a mul.
5379 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5380 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5381 DAG.getConstant(0, DL, CarryVT));
5382 return SDValue();
5383}
5384
5385// Function to calculate whether the Min/Max pair of SDNodes (potentially
5386// swapped around) make a signed saturate pattern, clamping to between a signed
5387// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5388// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5389// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5390// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5392 SDValue N3, ISD::CondCode CC, unsigned &BW,
5393 bool &Unsigned, SelectionDAG &DAG) {
5394 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5395 ISD::CondCode CC) {
5396 // The compare and select operand should be the same or the select operands
5397 // should be truncated versions of the comparison.
5398 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5399 return 0;
5400 // The constants need to be the same or a truncated version of each other.
5403 if (!N1C || !N3C)
5404 return 0;
5405 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5406 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5407 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5408 return 0;
5409 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5410 };
5411
5412 // Check the initial value is a SMIN/SMAX equivalent.
5413 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5414 if (!Opcode0)
5415 return SDValue();
5416
5417 // We could only need one range check, if the fptosi could never produce
5418 // the upper value.
5419 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5420 if (isNullOrNullSplat(N3)) {
5421 EVT IntVT = N0.getValueType().getScalarType();
5422 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5423 if (FPVT.isSimple()) {
5424 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5425 const fltSemantics &Semantics = InputTy->getFltSemantics();
5426 uint32_t MinBitWidth =
5427 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5428 if (IntVT.getSizeInBits() >= MinBitWidth) {
5429 Unsigned = true;
5430 BW = PowerOf2Ceil(MinBitWidth);
5431 return N0;
5432 }
5433 }
5434 }
5435 }
5436
5437 SDValue N00, N01, N02, N03;
5438 ISD::CondCode N0CC;
5439 switch (N0.getOpcode()) {
5440 case ISD::SMIN:
5441 case ISD::SMAX:
5442 N00 = N02 = N0.getOperand(0);
5443 N01 = N03 = N0.getOperand(1);
5444 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5445 break;
5446 case ISD::SELECT_CC:
5447 N00 = N0.getOperand(0);
5448 N01 = N0.getOperand(1);
5449 N02 = N0.getOperand(2);
5450 N03 = N0.getOperand(3);
5451 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5452 break;
5453 case ISD::SELECT:
5454 case ISD::VSELECT:
5455 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5456 return SDValue();
5457 N00 = N0.getOperand(0).getOperand(0);
5458 N01 = N0.getOperand(0).getOperand(1);
5459 N02 = N0.getOperand(1);
5460 N03 = N0.getOperand(2);
5461 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5462 break;
5463 default:
5464 return SDValue();
5465 }
5466
5467 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5468 if (!Opcode1 || Opcode0 == Opcode1)
5469 return SDValue();
5470
5471 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5472 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5473 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5474 return SDValue();
5475
5476 const APInt &MinC = MinCOp->getAPIntValue();
5477 const APInt &MaxC = MaxCOp->getAPIntValue();
5478 APInt MinCPlus1 = MinC + 1;
5479 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5480 BW = MinCPlus1.exactLogBase2() + 1;
5481 Unsigned = false;
5482 return N02;
5483 }
5484
5485 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5486 BW = MinCPlus1.exactLogBase2();
5487 Unsigned = true;
5488 return N02;
5489 }
5490
5491 return SDValue();
5492}
5493
5496 SelectionDAG &DAG) {
5497 unsigned BW;
5498 bool Unsigned;
5499 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5500 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5501 return SDValue();
5502 EVT FPVT = Fp.getOperand(0).getValueType();
5503 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5504 if (FPVT.isVector())
5505 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5506 FPVT.getVectorElementCount());
5507 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5508 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5509 return SDValue();
5510 SDLoc DL(Fp);
5511 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5512 DAG.getValueType(NewVT.getScalarType()));
5513 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5514}
5515
5518 SelectionDAG &DAG) {
5519 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5520 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5521 // be truncated versions of the setcc (N0/N1).
5522 if ((N0 != N2 &&
5523 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5525 return SDValue();
5528 if (!N1C || !N3C)
5529 return SDValue();
5530 const APInt &C1 = N1C->getAPIntValue();
5531 const APInt &C3 = N3C->getAPIntValue();
5532 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5533 C1 != C3.zext(C1.getBitWidth()))
5534 return SDValue();
5535
5536 unsigned BW = (C1 + 1).exactLogBase2();
5537 EVT FPVT = N0.getOperand(0).getValueType();
5538 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5539 if (FPVT.isVector())
5540 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5541 FPVT.getVectorElementCount());
5543 FPVT, NewVT))
5544 return SDValue();
5545
5546 SDValue Sat =
5547 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5548 DAG.getValueType(NewVT.getScalarType()));
5549 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5550}
5551
5552SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5553 SDValue N0 = N->getOperand(0);
5554 SDValue N1 = N->getOperand(1);
5555 EVT VT = N0.getValueType();
5556 unsigned Opcode = N->getOpcode();
5557 SDLoc DL(N);
5558
5559 // fold operation with constant operands.
5560 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5561 return C;
5562
5563 // If the operands are the same, this is a no-op.
5564 if (N0 == N1)
5565 return N0;
5566
5567 // canonicalize constant to RHS
5570 return DAG.getNode(Opcode, DL, VT, N1, N0);
5571
5572 // fold vector ops
5573 if (VT.isVector())
5574 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5575 return FoldedVOp;
5576
5577 // reassociate minmax
5578 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5579 return RMINMAX;
5580
5581 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5582 // Only do this if:
5583 // 1. The current op isn't legal and the flipped is.
5584 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5585 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5586 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5587 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5588 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5589 unsigned AltOpcode;
5590 switch (Opcode) {
5591 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5592 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5593 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5594 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5595 default: llvm_unreachable("Unknown MINMAX opcode");
5596 }
5597 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5598 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5599 }
5600
5601 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5603 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5604 return S;
5605 if (Opcode == ISD::UMIN)
5606 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5607 return S;
5608
5609 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5610 auto ReductionOpcode = [](unsigned Opcode) {
5611 switch (Opcode) {
5612 case ISD::SMIN:
5613 return ISD::VECREDUCE_SMIN;
5614 case ISD::SMAX:
5615 return ISD::VECREDUCE_SMAX;
5616 case ISD::UMIN:
5617 return ISD::VECREDUCE_UMIN;
5618 case ISD::UMAX:
5619 return ISD::VECREDUCE_UMAX;
5620 default:
5621 llvm_unreachable("Unexpected opcode");
5622 }
5623 };
5624 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5625 SDLoc(N), VT, N0, N1))
5626 return SD;
5627
5628 // Simplify the operands using demanded-bits information.
5630 return SDValue(N, 0);
5631
5632 return SDValue();
5633}
5634
5635/// If this is a bitwise logic instruction and both operands have the same
5636/// opcode, try to sink the other opcode after the logic instruction.
5637SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5638 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5639 EVT VT = N0.getValueType();
5640 unsigned LogicOpcode = N->getOpcode();
5641 unsigned HandOpcode = N0.getOpcode();
5642 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5643 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5644
5645 // Bail early if none of these transforms apply.
5646 if (N0.getNumOperands() == 0)
5647 return SDValue();
5648
5649 // FIXME: We should check number of uses of the operands to not increase
5650 // the instruction count for all transforms.
5651
5652 // Handle size-changing casts (or sign_extend_inreg).
5653 SDValue X = N0.getOperand(0);
5654 SDValue Y = N1.getOperand(0);
5655 EVT XVT = X.getValueType();
5656 SDLoc DL(N);
5657 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5658 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5659 N0.getOperand(1) == N1.getOperand(1))) {
5660 // If both operands have other uses, this transform would create extra
5661 // instructions without eliminating anything.
5662 if (!N0.hasOneUse() && !N1.hasOneUse())
5663 return SDValue();
5664 // We need matching integer source types.
5665 if (XVT != Y.getValueType())
5666 return SDValue();
5667 // Don't create an illegal op during or after legalization. Don't ever
5668 // create an unsupported vector op.
5669 if ((VT.isVector() || LegalOperations) &&
5670 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5671 return SDValue();
5672 // Avoid infinite looping with PromoteIntBinOp.
5673 // TODO: Should we apply desirable/legal constraints to all opcodes?
5674 if ((HandOpcode == ISD::ANY_EXTEND ||
5675 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5676 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5677 return SDValue();
5678 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5679 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5680 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5681 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5682 return DAG.getNode(HandOpcode, DL, VT, Logic);
5683 }
5684
5685 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5686 if (HandOpcode == ISD::TRUNCATE) {
5687 // If both operands have other uses, this transform would create extra
5688 // instructions without eliminating anything.
5689 if (!N0.hasOneUse() && !N1.hasOneUse())
5690 return SDValue();
5691 // We need matching source types.
5692 if (XVT != Y.getValueType())
5693 return SDValue();
5694 // Don't create an illegal op during or after legalization.
5695 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5696 return SDValue();
5697 // Be extra careful sinking truncate. If it's free, there's no benefit in
5698 // widening a binop. Also, don't create a logic op on an illegal type.
5699 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5700 return SDValue();
5701 if (!TLI.isTypeLegal(XVT))
5702 return SDValue();
5703 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5704 return DAG.getNode(HandOpcode, DL, VT, Logic);
5705 }
5706
5707 // For binops SHL/SRL/SRA/AND:
5708 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5709 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5710 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5711 N0.getOperand(1) == N1.getOperand(1)) {
5712 // If either operand has other uses, this transform is not an improvement.
5713 if (!N0.hasOneUse() || !N1.hasOneUse())
5714 return SDValue();
5715 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5716 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5717 }
5718
5719 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5720 if (HandOpcode == ISD::BSWAP) {
5721 // If either operand has other uses, this transform is not an improvement.
5722 if (!N0.hasOneUse() || !N1.hasOneUse())
5723 return SDValue();
5724 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5725 return DAG.getNode(HandOpcode, DL, VT, Logic);
5726 }
5727
5728 // For funnel shifts FSHL/FSHR:
5729 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5730 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5731 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5732 N0.getOperand(2) == N1.getOperand(2)) {
5733 if (!N0.hasOneUse() || !N1.hasOneUse())
5734 return SDValue();
5735 SDValue X1 = N0.getOperand(1);
5736 SDValue Y1 = N1.getOperand(1);
5737 SDValue S = N0.getOperand(2);
5738 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5739 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5740 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5741 }
5742
5743 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5744 // Only perform this optimization up until type legalization, before
5745 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5746 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5747 // we don't want to undo this promotion.
5748 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5749 // on scalars.
5750 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5751 Level <= AfterLegalizeTypes) {
5752 // Input types must be integer and the same.
5753 if (XVT.isInteger() && XVT == Y.getValueType() &&
5754 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5755 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5756 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5757 return DAG.getNode(HandOpcode, DL, VT, Logic);
5758 }
5759 }
5760
5761 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5762 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5763 // If both shuffles use the same mask, and both shuffle within a single
5764 // vector, then it is worthwhile to move the swizzle after the operation.
5765 // The type-legalizer generates this pattern when loading illegal
5766 // vector types from memory. In many cases this allows additional shuffle
5767 // optimizations.
5768 // There are other cases where moving the shuffle after the xor/and/or
5769 // is profitable even if shuffles don't perform a swizzle.
5770 // If both shuffles use the same mask, and both shuffles have the same first
5771 // or second operand, then it might still be profitable to move the shuffle
5772 // after the xor/and/or operation.
5773 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5774 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5775 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5776 assert(X.getValueType() == Y.getValueType() &&
5777 "Inputs to shuffles are not the same type");
5778
5779 // Check that both shuffles use the same mask. The masks are known to be of
5780 // the same length because the result vector type is the same.
5781 // Check also that shuffles have only one use to avoid introducing extra
5782 // instructions.
5783 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5784 !SVN0->getMask().equals(SVN1->getMask()))
5785 return SDValue();
5786
5787 // Don't try to fold this node if it requires introducing a
5788 // build vector of all zeros that might be illegal at this stage.
5789 SDValue ShOp = N0.getOperand(1);
5790 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5791 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5792
5793 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5794 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5795 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5796 N0.getOperand(0), N1.getOperand(0));
5797 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5798 }
5799
5800 // Don't try to fold this node if it requires introducing a
5801 // build vector of all zeros that might be illegal at this stage.
5802 ShOp = N0.getOperand(0);
5803 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5804 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5805
5806 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5807 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5808 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5809 N1.getOperand(1));
5810 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5811 }
5812 }
5813
5814 return SDValue();
5815}
5816
5817/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5818SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5819 const SDLoc &DL) {
5820 SDValue LL, LR, RL, RR, N0CC, N1CC;
5821 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5822 !isSetCCEquivalent(N1, RL, RR, N1CC))
5823 return SDValue();
5824
5825 assert(N0.getValueType() == N1.getValueType() &&
5826 "Unexpected operand types for bitwise logic op");
5827 assert(LL.getValueType() == LR.getValueType() &&
5828 RL.getValueType() == RR.getValueType() &&
5829 "Unexpected operand types for setcc");
5830
5831 // If we're here post-legalization or the logic op type is not i1, the logic
5832 // op type must match a setcc result type. Also, all folds require new
5833 // operations on the left and right operands, so those types must match.
5834 EVT VT = N0.getValueType();
5835 EVT OpVT = LL.getValueType();
5836 if (LegalOperations || VT.getScalarType() != MVT::i1)
5837 if (VT != getSetCCResultType(OpVT))
5838 return SDValue();
5839 if (OpVT != RL.getValueType())
5840 return SDValue();
5841
5842 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5843 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5844 bool IsInteger = OpVT.isInteger();
5845 if (LR == RR && CC0 == CC1 && IsInteger) {
5846 bool IsZero = isNullOrNullSplat(LR);
5847 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5848
5849 // All bits clear?
5850 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5851 // All sign bits clear?
5852 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5853 // Any bits set?
5854 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5855 // Any sign bits set?
5856 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5857
5858 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5859 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5860 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5861 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5862 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5863 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5864 AddToWorklist(Or.getNode());
5865 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5866 }
5867
5868 // All bits set?
5869 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5870 // All sign bits set?
5871 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5872 // Any bits clear?
5873 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5874 // Any sign bits clear?
5875 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5876
5877 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5878 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5879 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5880 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5881 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5882 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5883 AddToWorklist(And.getNode());
5884 return DAG.getSetCC(DL, VT, And, LR, CC1);
5885 }
5886 }
5887
5888 // TODO: What is the 'or' equivalent of this fold?
5889 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5890 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5891 IsInteger && CC0 == ISD::SETNE &&
5892 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5893 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5894 SDValue One = DAG.getConstant(1, DL, OpVT);
5895 SDValue Two = DAG.getConstant(2, DL, OpVT);
5896 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5897 AddToWorklist(Add.getNode());
5898 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5899 }
5900
5901 // Try more general transforms if the predicates match and the only user of
5902 // the compares is the 'and' or 'or'.
5903 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5904 N0.hasOneUse() && N1.hasOneUse()) {
5905 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5906 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5907 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5908 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5909 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5910 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5911 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5912 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5913 }
5914
5915 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5916 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5917 // Match a shared variable operand and 2 non-opaque constant operands.
5918 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5919 // The difference of the constants must be a single bit.
5920 const APInt &CMax =
5921 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5922 const APInt &CMin =
5923 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5924 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5925 };
5926 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5927 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5928 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5929 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5930 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5931 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5932 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5933 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5934 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5935 SDValue Zero = DAG.getConstant(0, DL, OpVT);
5936 return DAG.getSetCC(DL, VT, And, Zero, CC0);
5937 }
5938 }
5939 }
5940
5941 // Canonicalize equivalent operands to LL == RL.
5942 if (LL == RR && LR == RL) {
5944 std::swap(RL, RR);
5945 }
5946
5947 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5948 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5949 if (LL == RL && LR == RR) {
5950 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5951 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5952 if (NewCC != ISD::SETCC_INVALID &&
5953 (!LegalOperations ||
5954 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5955 TLI.isOperationLegal(ISD::SETCC, OpVT))))
5956 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5957 }
5958
5959 return SDValue();
5960}
5961
5962static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
5963 SelectionDAG &DAG) {
5964 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
5965}
5966
5967static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
5968 SelectionDAG &DAG) {
5969 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
5970}
5971
5972static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
5973 ISD::CondCode CC, unsigned OrAndOpcode,
5974 SelectionDAG &DAG,
5975 bool isFMAXNUMFMINNUM_IEEE,
5976 bool isFMAXNUMFMINNUM) {
5977 // The optimization cannot be applied for all the predicates because
5978 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
5979 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
5980 // applied at all if one of the operands is a signaling NaN.
5981
5982 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
5983 // are non NaN values.
5984 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
5985 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
5986 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5987 isFMAXNUMFMINNUM_IEEE
5990 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
5991 (OrAndOpcode == ISD::OR)) ||
5992 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
5993 (OrAndOpcode == ISD::AND)))
5994 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
5995 isFMAXNUMFMINNUM_IEEE
5998 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
5999 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6000 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6001 // that there are not any sNaNs, then the optimization is not valid
6002 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6003 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6004 // we can prove that we do not have any sNaNs, then we can do the
6005 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6006 // cases.
6007 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6008 (OrAndOpcode == ISD::OR)) ||
6009 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6010 (OrAndOpcode == ISD::AND)))
6011 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6012 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6013 isFMAXNUMFMINNUM_IEEE
6016 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6017 (OrAndOpcode == ISD::OR)) ||
6018 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6019 (OrAndOpcode == ISD::AND)))
6020 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6021 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6022 isFMAXNUMFMINNUM_IEEE
6025 return ISD::DELETED_NODE;
6026}
6027
6030 assert(
6031 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6032 "Invalid Op to combine SETCC with");
6033
6034 // TODO: Search past casts/truncates.
6035 SDValue LHS = LogicOp->getOperand(0);
6036 SDValue RHS = LogicOp->getOperand(1);
6037 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6038 !LHS->hasOneUse() || !RHS->hasOneUse())
6039 return SDValue();
6040
6041 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6043 LogicOp, LHS.getNode(), RHS.getNode());
6044
6045 SDValue LHS0 = LHS->getOperand(0);
6046 SDValue RHS0 = RHS->getOperand(0);
6047 SDValue LHS1 = LHS->getOperand(1);
6048 SDValue RHS1 = RHS->getOperand(1);
6049 // TODO: We don't actually need a splat here, for vectors we just need the
6050 // invariants to hold for each element.
6051 auto *LHS1C = isConstOrConstSplat(LHS1);
6052 auto *RHS1C = isConstOrConstSplat(RHS1);
6053 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6054 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6055 EVT VT = LogicOp->getValueType(0);
6056 EVT OpVT = LHS0.getValueType();
6057 SDLoc DL(LogicOp);
6058
6059 // Check if the operands of an and/or operation are comparisons and if they
6060 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6061 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6062 // sequence will be replaced with min-cmp sequence:
6063 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6064 // and and-cmp-cmp will be replaced with max-cmp sequence:
6065 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6066 // The optimization does not work for `==` or `!=` .
6067 // The two comparisons should have either the same predicate or the
6068 // predicate of one of the comparisons is the opposite of the other one.
6069 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6071 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6073 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6074 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6075 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6076 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6077 (OpVT.isFloatingPoint() &&
6078 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6080 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6081 CCL != ISD::SETTRUE &&
6082 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6083
6084 SDValue CommonValue, Operand1, Operand2;
6086 if (CCL == CCR) {
6087 if (LHS0 == RHS0) {
6088 CommonValue = LHS0;
6089 Operand1 = LHS1;
6090 Operand2 = RHS1;
6092 } else if (LHS1 == RHS1) {
6093 CommonValue = LHS1;
6094 Operand1 = LHS0;
6095 Operand2 = RHS0;
6096 CC = CCL;
6097 }
6098 } else {
6099 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6100 if (LHS0 == RHS1) {
6101 CommonValue = LHS0;
6102 Operand1 = LHS1;
6103 Operand2 = RHS0;
6104 CC = CCR;
6105 } else if (RHS0 == LHS1) {
6106 CommonValue = LHS1;
6107 Operand1 = LHS0;
6108 Operand2 = RHS1;
6109 CC = CCL;
6110 }
6111 }
6112
6113 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6114 // handle it using OR/AND.
6115 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6117 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6119
6120 if (CC != ISD::SETCC_INVALID) {
6121 unsigned NewOpcode = ISD::DELETED_NODE;
6122 bool IsSigned = isSignedIntSetCC(CC);
6123 if (OpVT.isInteger()) {
6124 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6125 CC == ISD::SETLT || CC == ISD::SETULT);
6126 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6127 if (IsLess == IsOr)
6128 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6129 else
6130 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6131 } else if (OpVT.isFloatingPoint())
6132 NewOpcode =
6133 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6134 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6135
6136 if (NewOpcode != ISD::DELETED_NODE) {
6137 SDValue MinMaxValue =
6138 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6139 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6140 }
6141 }
6142 }
6143
6144 if (TargetPreference == AndOrSETCCFoldKind::None)
6145 return SDValue();
6146
6147 if (CCL == CCR &&
6148 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6149 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6150 const APInt &APLhs = LHS1C->getAPIntValue();
6151 const APInt &APRhs = RHS1C->getAPIntValue();
6152
6153 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6154 // case this is just a compare).
6155 if (APLhs == (-APRhs) &&
6156 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6157 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6158 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6159 // (icmp eq A, C) | (icmp eq A, -C)
6160 // -> (icmp eq Abs(A), C)
6161 // (icmp ne A, C) & (icmp ne A, -C)
6162 // -> (icmp ne Abs(A), C)
6163 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6164 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6165 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6166 } else if (TargetPreference &
6168
6169 // AndOrSETCCFoldKind::AddAnd:
6170 // A == C0 | A == C1
6171 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6172 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6173 // A != C0 & A != C1
6174 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6175 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6176
6177 // AndOrSETCCFoldKind::NotAnd:
6178 // A == C0 | A == C1
6179 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6180 // -> ~A & smin(C0, C1) == 0
6181 // A != C0 & A != C1
6182 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6183 // -> ~A & smin(C0, C1) != 0
6184
6185 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6186 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6187 APInt Dif = MaxC - MinC;
6188 if (!Dif.isZero() && Dif.isPowerOf2()) {
6189 if (MaxC.isAllOnes() &&
6190 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6191 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6192 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6193 DAG.getConstant(MinC, DL, OpVT));
6194 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6195 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6196 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6197
6198 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6199 DAG.getConstant(-MinC, DL, OpVT));
6200 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6201 DAG.getConstant(~Dif, DL, OpVT));
6202 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6203 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6204 }
6205 }
6206 }
6207 }
6208
6209 return SDValue();
6210}
6211
6212// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6213// We canonicalize to the `select` form in the middle end, but the `and` form
6214// gets better codegen and all tested targets (arm, x86, riscv)
6216 const SDLoc &DL, SelectionDAG &DAG) {
6217 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6218 if (!isNullConstant(F))
6219 return SDValue();
6220
6221 EVT CondVT = Cond.getValueType();
6222 if (TLI.getBooleanContents(CondVT) !=
6224 return SDValue();
6225
6226 if (T.getOpcode() != ISD::AND)
6227 return SDValue();
6228
6229 if (!isOneConstant(T.getOperand(1)))
6230 return SDValue();
6231
6232 EVT OpVT = T.getValueType();
6233
6234 SDValue CondMask =
6235 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6236 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6237}
6238
6239/// This contains all DAGCombine rules which reduce two values combined by
6240/// an And operation to a single value. This makes them reusable in the context
6241/// of visitSELECT(). Rules involving constants are not included as
6242/// visitSELECT() already handles those cases.
6243SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6244 EVT VT = N1.getValueType();
6245 SDLoc DL(N);
6246
6247 // fold (and x, undef) -> 0
6248 if (N0.isUndef() || N1.isUndef())
6249 return DAG.getConstant(0, DL, VT);
6250
6251 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6252 return V;
6253
6254 // Canonicalize:
6255 // and(x, add) -> and(add, x)
6256 if (N1.getOpcode() == ISD::ADD)
6257 std::swap(N0, N1);
6258
6259 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6260 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6261 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6262 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6263 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6264 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6265 // immediate for an add, but it is legal if its top c2 bits are set,
6266 // transform the ADD so the immediate doesn't need to be materialized
6267 // in a register.
6268 APInt ADDC = ADDI->getAPIntValue();
6269 APInt SRLC = SRLI->getAPIntValue();
6270 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6271 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6273 SRLC.getZExtValue());
6274 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6275 ADDC |= Mask;
6276 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6277 SDLoc DL0(N0);
6278 SDValue NewAdd =
6279 DAG.getNode(ISD::ADD, DL0, VT,
6280 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6281 CombineTo(N0.getNode(), NewAdd);
6282 // Return N so it doesn't get rechecked!
6283 return SDValue(N, 0);
6284 }
6285 }
6286 }
6287 }
6288 }
6289 }
6290
6291 return SDValue();
6292}
6293
6294bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6295 EVT LoadResultTy, EVT &ExtVT) {
6296 if (!AndC->getAPIntValue().isMask())
6297 return false;
6298
6299 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6300
6301 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6302 EVT LoadedVT = LoadN->getMemoryVT();
6303
6304 if (ExtVT == LoadedVT &&
6305 (!LegalOperations ||
6306 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6307 // ZEXTLOAD will match without needing to change the size of the value being
6308 // loaded.
6309 return true;
6310 }
6311
6312 // Do not change the width of a volatile or atomic loads.
6313 if (!LoadN->isSimple())
6314 return false;
6315
6316 // Do not generate loads of non-round integer types since these can
6317 // be expensive (and would be wrong if the type is not byte sized).
6318 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6319 return false;
6320
6321 if (LegalOperations &&
6322 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6323 return false;
6324
6325 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6326 return false;
6327
6328 return true;
6329}
6330
6331bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6332 ISD::LoadExtType ExtType, EVT &MemVT,
6333 unsigned ShAmt) {
6334 if (!LDST)
6335 return false;
6336 // Only allow byte offsets.
6337 if (ShAmt % 8)
6338 return false;
6339
6340 // Do not generate loads of non-round integer types since these can
6341 // be expensive (and would be wrong if the type is not byte sized).
6342 if (!MemVT.isRound())
6343 return false;
6344
6345 // Don't change the width of a volatile or atomic loads.
6346 if (!LDST->isSimple())
6347 return false;
6348
6349 EVT LdStMemVT = LDST->getMemoryVT();
6350
6351 // Bail out when changing the scalable property, since we can't be sure that
6352 // we're actually narrowing here.
6353 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6354 return false;
6355
6356 // Verify that we are actually reducing a load width here.
6357 if (LdStMemVT.bitsLT(MemVT))
6358 return false;
6359
6360 // Ensure that this isn't going to produce an unsupported memory access.
6361 if (ShAmt) {
6362 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6363 const unsigned ByteShAmt = ShAmt / 8;
6364 const Align LDSTAlign = LDST->getAlign();
6365 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6366 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6367 LDST->getAddressSpace(), NarrowAlign,
6368 LDST->getMemOperand()->getFlags()))
6369 return false;
6370 }
6371
6372 // It's not possible to generate a constant of extended or untyped type.
6373 EVT PtrType = LDST->getBasePtr().getValueType();
6374 if (PtrType == MVT::Untyped || PtrType.isExtended())
6375 return false;
6376
6377 if (isa<LoadSDNode>(LDST)) {
6378 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6379 // Don't transform one with multiple uses, this would require adding a new
6380 // load.
6381 if (!SDValue(Load, 0).hasOneUse())
6382 return false;
6383
6384 if (LegalOperations &&
6385 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6386 return false;
6387
6388 // For the transform to be legal, the load must produce only two values
6389 // (the value loaded and the chain). Don't transform a pre-increment
6390 // load, for example, which produces an extra value. Otherwise the
6391 // transformation is not equivalent, and the downstream logic to replace
6392 // uses gets things wrong.
6393 if (Load->getNumValues() > 2)
6394 return false;
6395
6396 // If the load that we're shrinking is an extload and we're not just
6397 // discarding the extension we can't simply shrink the load. Bail.
6398 // TODO: It would be possible to merge the extensions in some cases.
6399 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6400 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6401 return false;
6402
6403 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6404 return false;
6405 } else {
6406 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6407 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6408 // Can't write outside the original store
6409 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6410 return false;
6411
6412 if (LegalOperations &&
6413 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6414 return false;
6415 }
6416 return true;
6417}
6418
6419bool DAGCombiner::SearchForAndLoads(SDNode *N,
6421 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6422 ConstantSDNode *Mask,
6423 SDNode *&NodeToMask) {
6424 // Recursively search for the operands, looking for loads which can be
6425 // narrowed.
6426 for (SDValue Op : N->op_values()) {
6427 if (Op.getValueType().isVector())
6428 return false;
6429
6430 // Some constants may need fixing up later if they are too large.
6431 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6432 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6433 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6434 NodesWithConsts.insert(N);
6435 continue;
6436 }
6437
6438 if (!Op.hasOneUse())
6439 return false;
6440
6441 switch(Op.getOpcode()) {
6442 case ISD::LOAD: {
6443 auto *Load = cast<LoadSDNode>(Op);
6444 EVT ExtVT;
6445 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6446 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6447
6448 // ZEXTLOAD is already small enough.
6449 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6450 ExtVT.bitsGE(Load->getMemoryVT()))
6451 continue;
6452
6453 // Use LE to convert equal sized loads to zext.
6454 if (ExtVT.bitsLE(Load->getMemoryVT()))
6455 Loads.push_back(Load);
6456
6457 continue;
6458 }
6459 return false;
6460 }
6461 case ISD::ZERO_EXTEND:
6462 case ISD::AssertZext: {
6463 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6464 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6465 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6466 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6467 Op.getOperand(0).getValueType();
6468
6469 // We can accept extending nodes if the mask is wider or an equal
6470 // width to the original type.
6471 if (ExtVT.bitsGE(VT))
6472 continue;
6473 break;
6474 }
6475 case ISD::OR:
6476 case ISD::XOR:
6477 case ISD::AND:
6478 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6479 NodeToMask))
6480 return false;
6481 continue;
6482 }
6483
6484 // Allow one node which will masked along with any loads found.
6485 if (NodeToMask)
6486 return false;
6487
6488 // Also ensure that the node to be masked only produces one data result.
6489 NodeToMask = Op.getNode();
6490 if (NodeToMask->getNumValues() > 1) {
6491 bool HasValue = false;
6492 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6493 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6494 if (VT != MVT::Glue && VT != MVT::Other) {
6495 if (HasValue) {
6496 NodeToMask = nullptr;
6497 return false;
6498 }
6499 HasValue = true;
6500 }
6501 }
6502 assert(HasValue && "Node to be masked has no data result?");
6503 }
6504 }
6505 return true;
6506}
6507
6508bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6509 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6510 if (!Mask)
6511 return false;
6512
6513 if (!Mask->getAPIntValue().isMask())
6514 return false;
6515
6516 // No need to do anything if the and directly uses a load.
6517 if (isa<LoadSDNode>(N->getOperand(0)))
6518 return false;
6519
6521 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6522 SDNode *FixupNode = nullptr;
6523 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6524 if (Loads.empty())
6525 return false;
6526
6527 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6528 SDValue MaskOp = N->getOperand(1);
6529
6530 // If it exists, fixup the single node we allow in the tree that needs
6531 // masking.
6532 if (FixupNode) {
6533 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6534 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6535 FixupNode->getValueType(0),
6536 SDValue(FixupNode, 0), MaskOp);
6537 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6538 if (And.getOpcode() == ISD ::AND)
6539 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6540 }
6541
6542 // Narrow any constants that need it.
6543 for (auto *LogicN : NodesWithConsts) {
6544 SDValue Op0 = LogicN->getOperand(0);
6545 SDValue Op1 = LogicN->getOperand(1);
6546
6547 if (isa<ConstantSDNode>(Op0))
6548 Op0 =
6549 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6550
6551 if (isa<ConstantSDNode>(Op1))
6552 Op1 =
6553 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6554
6555 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6556 std::swap(Op0, Op1);
6557
6558 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6559 }
6560
6561 // Create narrow loads.
6562 for (auto *Load : Loads) {
6563 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6564 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6565 SDValue(Load, 0), MaskOp);
6566 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6567 if (And.getOpcode() == ISD ::AND)
6568 And = SDValue(
6569 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6570 SDValue NewLoad = reduceLoadWidth(And.getNode());
6571 assert(NewLoad &&
6572 "Shouldn't be masking the load if it can't be narrowed");
6573 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6574 }
6575 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6576 return true;
6577 }
6578 return false;
6579}
6580
6581// Unfold
6582// x & (-1 'logical shift' y)
6583// To
6584// (x 'opposite logical shift' y) 'logical shift' y
6585// if it is better for performance.
6586SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6587 assert(N->getOpcode() == ISD::AND);
6588
6589 SDValue N0 = N->getOperand(0);
6590 SDValue N1 = N->getOperand(1);
6591
6592 // Do we actually prefer shifts over mask?
6594 return SDValue();
6595
6596 // Try to match (-1 '[outer] logical shift' y)
6597 unsigned OuterShift;
6598 unsigned InnerShift; // The opposite direction to the OuterShift.
6599 SDValue Y; // Shift amount.
6600 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6601 if (!M.hasOneUse())
6602 return false;
6603 OuterShift = M->getOpcode();
6604 if (OuterShift == ISD::SHL)
6605 InnerShift = ISD::SRL;
6606 else if (OuterShift == ISD::SRL)
6607 InnerShift = ISD::SHL;
6608 else
6609 return false;
6610 if (!isAllOnesConstant(M->getOperand(0)))
6611 return false;
6612 Y = M->getOperand(1);
6613 return true;
6614 };
6615
6616 SDValue X;
6617 if (matchMask(N1))
6618 X = N0;
6619 else if (matchMask(N0))
6620 X = N1;
6621 else
6622 return SDValue();
6623
6624 SDLoc DL(N);
6625 EVT VT = N->getValueType(0);
6626
6627 // tmp = x 'opposite logical shift' y
6628 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6629 // ret = tmp 'logical shift' y
6630 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6631
6632 return T1;
6633}
6634
6635/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6636/// For a target with a bit test, this is expected to become test + set and save
6637/// at least 1 instruction.
6639 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6640
6641 // Look through an optional extension.
6642 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6643 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6644 And0 = And0.getOperand(0);
6645 if (!isOneConstant(And1) || !And0.hasOneUse())
6646 return SDValue();
6647
6648 SDValue Src = And0;
6649
6650 // Attempt to find a 'not' op.
6651 // TODO: Should we favor test+set even without the 'not' op?
6652 bool FoundNot = false;
6653 if (isBitwiseNot(Src)) {
6654 FoundNot = true;
6655 Src = Src.getOperand(0);
6656
6657 // Look though an optional truncation. The source operand may not be the
6658 // same type as the original 'and', but that is ok because we are masking
6659 // off everything but the low bit.
6660 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6661 Src = Src.getOperand(0);
6662 }
6663
6664 // Match a shift-right by constant.
6665 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6666 return SDValue();
6667
6668 // This is probably not worthwhile without a supported type.
6669 EVT SrcVT = Src.getValueType();
6670 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6671 if (!TLI.isTypeLegal(SrcVT))
6672 return SDValue();
6673
6674 // We might have looked through casts that make this transform invalid.
6675 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6676 SDValue ShiftAmt = Src.getOperand(1);
6677 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6678 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6679 return SDValue();
6680
6681 // Set source to shift source.
6682 Src = Src.getOperand(0);
6683
6684 // Try again to find a 'not' op.
6685 // TODO: Should we favor test+set even with two 'not' ops?
6686 if (!FoundNot) {
6687 if (!isBitwiseNot(Src))
6688 return SDValue();
6689 Src = Src.getOperand(0);
6690 }
6691
6692 if (!TLI.hasBitTest(Src, ShiftAmt))
6693 return SDValue();
6694
6695 // Turn this into a bit-test pattern using mask op + setcc:
6696 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6697 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6698 SDLoc DL(And);
6699 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6700 EVT CCVT =
6701 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6702 SDValue Mask = DAG.getConstant(
6703 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6704 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6705 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6706 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6707 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6708}
6709
6710/// For targets that support usubsat, match a bit-hack form of that operation
6711/// that ends in 'and' and convert it.
6713 EVT VT = N->getValueType(0);
6714 unsigned BitWidth = VT.getScalarSizeInBits();
6715 APInt SignMask = APInt::getSignMask(BitWidth);
6716
6717 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6718 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6719 // xor/add with SMIN (signmask) are logically equivalent.
6720 SDValue X;
6721 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6723 m_SpecificInt(BitWidth - 1))))) &&
6726 m_SpecificInt(BitWidth - 1))))))
6727 return SDValue();
6728
6729 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6730 DAG.getConstant(SignMask, DL, VT));
6731}
6732
6733/// Given a bitwise logic operation N with a matching bitwise logic operand,
6734/// fold a pattern where 2 of the source operands are identically shifted
6735/// values. For example:
6736/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6738 SelectionDAG &DAG) {
6739 unsigned LogicOpcode = N->getOpcode();
6740 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6741 "Expected bitwise logic operation");
6742
6743 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6744 return SDValue();
6745
6746 // Match another bitwise logic op and a shift.
6747 unsigned ShiftOpcode = ShiftOp.getOpcode();
6748 if (LogicOp.getOpcode() != LogicOpcode ||
6749 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6750 ShiftOpcode == ISD::SRA))
6751 return SDValue();
6752
6753 // Match another shift op inside the first logic operand. Handle both commuted
6754 // possibilities.
6755 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6756 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6757 SDValue X1 = ShiftOp.getOperand(0);
6758 SDValue Y = ShiftOp.getOperand(1);
6759 SDValue X0, Z;
6760 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6761 LogicOp.getOperand(0).getOperand(1) == Y) {
6762 X0 = LogicOp.getOperand(0).getOperand(0);
6763 Z = LogicOp.getOperand(1);
6764 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6765 LogicOp.getOperand(1).getOperand(1) == Y) {
6766 X0 = LogicOp.getOperand(1).getOperand(0);
6767 Z = LogicOp.getOperand(0);
6768 } else {
6769 return SDValue();
6770 }
6771
6772 EVT VT = N->getValueType(0);
6773 SDLoc DL(N);
6774 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6775 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6776 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6777}
6778
6779/// Given a tree of logic operations with shape like
6780/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6781/// try to match and fold shift operations with the same shift amount.
6782/// For example:
6783/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6784/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6786 SDValue RightHand, SelectionDAG &DAG) {
6787 unsigned LogicOpcode = N->getOpcode();
6788 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6789 "Expected bitwise logic operation");
6790 if (LeftHand.getOpcode() != LogicOpcode ||
6791 RightHand.getOpcode() != LogicOpcode)
6792 return SDValue();
6793 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6794 return SDValue();
6795
6796 // Try to match one of following patterns:
6797 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6798 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6799 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6800 // itself.
6801 SDValue CombinedShifts, W;
6802 SDValue R0 = RightHand.getOperand(0);
6803 SDValue R1 = RightHand.getOperand(1);
6804 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6805 W = R1;
6806 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6807 W = R0;
6808 else
6809 return SDValue();
6810
6811 EVT VT = N->getValueType(0);
6812 SDLoc DL(N);
6813 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6814}
6815
6816SDValue DAGCombiner::visitAND(SDNode *N) {
6817 SDValue N0 = N->getOperand(0);
6818 SDValue N1 = N->getOperand(1);
6819 EVT VT = N1.getValueType();
6820 SDLoc DL(N);
6821
6822 // x & x --> x
6823 if (N0 == N1)
6824 return N0;
6825
6826 // fold (and c1, c2) -> c1&c2
6827 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6828 return C;
6829
6830 // canonicalize constant to RHS
6833 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6834
6835 if (areBitwiseNotOfEachother(N0, N1))
6836 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6837
6838 // fold vector ops
6839 if (VT.isVector()) {
6840 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6841 return FoldedVOp;
6842
6843 // fold (and x, 0) -> 0, vector edition
6845 // do not return N1, because undef node may exist in N1
6847 N1.getValueType());
6848
6849 // fold (and x, -1) -> x, vector edition
6851 return N0;
6852
6853 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6854 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6855 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6856 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6857 N1.hasOneUse()) {
6858 EVT LoadVT = MLoad->getMemoryVT();
6859 EVT ExtVT = VT;
6860 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6861 // For this AND to be a zero extension of the masked load the elements
6862 // of the BuildVec must mask the bottom bits of the extended element
6863 // type
6864 uint64_t ElementSize =
6866 if (Splat->getAPIntValue().isMask(ElementSize)) {
6867 SDValue NewLoad = DAG.getMaskedLoad(
6868 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6869 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6870 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6871 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6872 bool LoadHasOtherUsers = !N0.hasOneUse();
6873 CombineTo(N, NewLoad);
6874 if (LoadHasOtherUsers)
6875 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6876 return SDValue(N, 0);
6877 }
6878 }
6879 }
6880 }
6881
6882 // fold (and x, -1) -> x
6883 if (isAllOnesConstant(N1))
6884 return N0;
6885
6886 // if (and x, c) is known to be zero, return 0
6887 unsigned BitWidth = VT.getScalarSizeInBits();
6890 return DAG.getConstant(0, DL, VT);
6891
6892 if (SDValue R = foldAndOrOfSETCC(N, DAG))
6893 return R;
6894
6895 if (SDValue NewSel = foldBinOpIntoSelect(N))
6896 return NewSel;
6897
6898 // reassociate and
6899 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
6900 return RAND;
6901
6902 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
6903 if (SDValue SD =
6904 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
6905 return SD;
6906
6907 // fold (and (or x, C), D) -> D if (C & D) == D
6908 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6909 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6910 };
6911 if (N0.getOpcode() == ISD::OR &&
6912 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6913 return N1;
6914
6915 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6916 SDValue N0Op0 = N0.getOperand(0);
6917 EVT SrcVT = N0Op0.getValueType();
6918 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
6919 APInt Mask = ~N1C->getAPIntValue();
6920 Mask = Mask.trunc(SrcBitWidth);
6921
6922 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6923 if (DAG.MaskedValueIsZero(N0Op0, Mask))
6924 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
6925
6926 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
6927 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
6928 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
6929 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
6930 TLI.isNarrowingProfitable(VT, SrcVT))
6931 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
6932 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
6933 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
6934 }
6935
6936 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
6937 if (ISD::isExtOpcode(N0.getOpcode())) {
6938 unsigned ExtOpc = N0.getOpcode();
6939 SDValue N0Op0 = N0.getOperand(0);
6940 if (N0Op0.getOpcode() == ISD::AND &&
6941 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
6944 N0->hasOneUse() && N0Op0->hasOneUse()) {
6945 SDValue NewMask =
6946 DAG.getNode(ISD::AND, DL, VT, N1,
6947 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
6948 return DAG.getNode(ISD::AND, DL, VT,
6949 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
6950 NewMask);
6951 }
6952 }
6953
6954 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6955 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6956 // already be zero by virtue of the width of the base type of the load.
6957 //
6958 // the 'X' node here can either be nothing or an extract_vector_elt to catch
6959 // more cases.
6960 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6962 N0.getOperand(0).getOpcode() == ISD::LOAD &&
6963 N0.getOperand(0).getResNo() == 0) ||
6964 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6965 auto *Load =
6966 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
6967
6968 // Get the constant (if applicable) the zero'th operand is being ANDed with.
6969 // This can be a pure constant or a vector splat, in which case we treat the
6970 // vector as a scalar and use the splat value.
6973 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
6974 Constant = C->getAPIntValue();
6975 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6976 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6977 APInt SplatValue, SplatUndef;
6978 unsigned SplatBitSize;
6979 bool HasAnyUndefs;
6980 // Endianness should not matter here. Code below makes sure that we only
6981 // use the result if the SplatBitSize is a multiple of the vector element
6982 // size. And after that we AND all element sized parts of the splat
6983 // together. So the end result should be the same regardless of in which
6984 // order we do those operations.
6985 const bool IsBigEndian = false;
6986 bool IsSplat =
6987 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
6988 HasAnyUndefs, EltBitWidth, IsBigEndian);
6989
6990 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6991 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6992 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
6993 // Undef bits can contribute to a possible optimisation if set, so
6994 // set them.
6995 SplatValue |= SplatUndef;
6996
6997 // The splat value may be something like "0x00FFFFFF", which means 0 for
6998 // the first vector value and FF for the rest, repeating. We need a mask
6999 // that will apply equally to all members of the vector, so AND all the
7000 // lanes of the constant together.
7001 Constant = APInt::getAllOnes(EltBitWidth);
7002 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7003 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7004 }
7005 }
7006
7007 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7008 // actually legal and isn't going to get expanded, else this is a false
7009 // optimisation.
7010 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7011 Load->getValueType(0),
7012 Load->getMemoryVT());
7013
7014 // Resize the constant to the same size as the original memory access before
7015 // extension. If it is still the AllOnesValue then this AND is completely
7016 // unneeded.
7017 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7018
7019 bool B;
7020 switch (Load->getExtensionType()) {
7021 default: B = false; break;
7022 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7023 case ISD::ZEXTLOAD:
7024 case ISD::NON_EXTLOAD: B = true; break;
7025 }
7026
7027 if (B && Constant.isAllOnes()) {
7028 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7029 // preserve semantics once we get rid of the AND.
7030 SDValue NewLoad(Load, 0);
7031
7032 // Fold the AND away. NewLoad may get replaced immediately.
7033 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7034
7035 if (Load->getExtensionType() == ISD::EXTLOAD) {
7036 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7037 Load->getValueType(0), SDLoc(Load),
7038 Load->getChain(), Load->getBasePtr(),
7039 Load->getOffset(), Load->getMemoryVT(),
7040 Load->getMemOperand());
7041 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7042 if (Load->getNumValues() == 3) {
7043 // PRE/POST_INC loads have 3 values.
7044 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7045 NewLoad.getValue(2) };
7046 CombineTo(Load, To, 3, true);
7047 } else {
7048 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7049 }
7050 }
7051
7052 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7053 }
7054 }
7055
7056 // Try to convert a constant mask AND into a shuffle clear mask.
7057 if (VT.isVector())
7058 if (SDValue Shuffle = XformToShuffleWithZero(N))
7059 return Shuffle;
7060
7061 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7062 return Combined;
7063
7064 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7066 SDValue Ext = N0.getOperand(0);
7067 EVT ExtVT = Ext->getValueType(0);
7068 SDValue Extendee = Ext->getOperand(0);
7069
7070 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7071 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7072 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7073 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7074 // => (extract_subvector (iN_zeroext v))
7075 SDValue ZeroExtExtendee =
7076 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7077
7078 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7079 N0.getOperand(1));
7080 }
7081 }
7082
7083 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7084 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7085 EVT MemVT = GN0->getMemoryVT();
7086 EVT ScalarVT = MemVT.getScalarType();
7087
7088 if (SDValue(GN0, 0).hasOneUse() &&
7089 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7091 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7092 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7093
7094 SDValue ZExtLoad = DAG.getMaskedGather(
7095 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7096 GN0->getIndexType(), ISD::ZEXTLOAD);
7097
7098 CombineTo(N, ZExtLoad);
7099 AddToWorklist(ZExtLoad.getNode());
7100 // Avoid recheck of N.
7101 return SDValue(N, 0);
7102 }
7103 }
7104
7105 // fold (and (load x), 255) -> (zextload x, i8)
7106 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7107 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7108 if (SDValue Res = reduceLoadWidth(N))
7109 return Res;
7110
7111 if (LegalTypes) {
7112 // Attempt to propagate the AND back up to the leaves which, if they're
7113 // loads, can be combined to narrow loads and the AND node can be removed.
7114 // Perform after legalization so that extend nodes will already be
7115 // combined into the loads.
7116 if (BackwardsPropagateMask(N))
7117 return SDValue(N, 0);
7118 }
7119
7120 if (SDValue Combined = visitANDLike(N0, N1, N))
7121 return Combined;
7122
7123 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7124 if (N0.getOpcode() == N1.getOpcode())
7125 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7126 return V;
7127
7128 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7129 return R;
7130 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7131 return R;
7132
7133 // Masking the negated extension of a boolean is just the zero-extended
7134 // boolean:
7135 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7136 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7137 //
7138 // Note: the SimplifyDemandedBits fold below can make an information-losing
7139 // transform, and then we have no way to find this better fold.
7140 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7141 if (isNullOrNullSplat(N0.getOperand(0))) {
7142 SDValue SubRHS = N0.getOperand(1);
7143 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7144 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7145 return SubRHS;
7146 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7147 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7148 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7149 }
7150 }
7151
7152 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7153 // fold (and (sra)) -> (and (srl)) when possible.
7155 return SDValue(N, 0);
7156
7157 // fold (zext_inreg (extload x)) -> (zextload x)
7158 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7159 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7160 (ISD::isEXTLoad(N0.getNode()) ||
7161 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7162 auto *LN0 = cast<LoadSDNode>(N0);
7163 EVT MemVT = LN0->getMemoryVT();
7164 // If we zero all the possible extended bits, then we can turn this into
7165 // a zextload if we are running before legalize or the operation is legal.
7166 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7167 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7168 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7169 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7170 ((!LegalOperations && LN0->isSimple()) ||
7171 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7172 SDValue ExtLoad =
7173 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7174 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7175 AddToWorklist(N);
7176 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7177 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7178 }
7179 }
7180
7181 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7182 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7183 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7184 N0.getOperand(1), false))
7185 return BSwap;
7186 }
7187
7188 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7189 return Shifts;
7190
7191 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7192 return V;
7193
7194 // Recognize the following pattern:
7195 //
7196 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7197 //
7198 // where bitmask is a mask that clears the upper bits of AndVT. The
7199 // number of bits in bitmask must be a power of two.
7200 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7201 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7202 return false;
7203
7204 auto *C = dyn_cast<ConstantSDNode>(RHS);
7205 if (!C)
7206 return false;
7207
7208 if (!C->getAPIntValue().isMask(
7209 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7210 return false;
7211
7212 return true;
7213 };
7214
7215 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7216 if (IsAndZeroExtMask(N0, N1))
7217 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7218
7219 if (hasOperation(ISD::USUBSAT, VT))
7220 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7221 return V;
7222
7223 // Postpone until legalization completed to avoid interference with bswap
7224 // folding
7225 if (LegalOperations || VT.isVector())
7226 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7227 return R;
7228
7229 return SDValue();
7230}
7231
7232/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7233SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7234 bool DemandHighBits) {
7235 if (!LegalOperations)
7236 return SDValue();
7237
7238 EVT VT = N->getValueType(0);
7239 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7240 return SDValue();
7242 return SDValue();
7243
7244 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7245 bool LookPassAnd0 = false;
7246 bool LookPassAnd1 = false;
7247 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7248 std::swap(N0, N1);
7249 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7250 std::swap(N0, N1);
7251 if (N0.getOpcode() == ISD::AND) {
7252 if (!N0->hasOneUse())
7253 return SDValue();
7254 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7255 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7256 // This is needed for X86.
7257 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7258 N01C->getZExtValue() != 0xFFFF))
7259 return SDValue();
7260 N0 = N0.getOperand(0);
7261 LookPassAnd0 = true;
7262 }
7263
7264 if (N1.getOpcode() == ISD::AND) {
7265 if (!N1->hasOneUse())
7266 return SDValue();
7267 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7268 if (!N11C || N11C->getZExtValue() != 0xFF)
7269 return SDValue();
7270 N1 = N1.getOperand(0);
7271 LookPassAnd1 = true;
7272 }
7273
7274 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7275 std::swap(N0, N1);
7276 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7277 return SDValue();
7278 if (!N0->hasOneUse() || !N1->hasOneUse())
7279 return SDValue();
7280
7281 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7282 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7283 if (!N01C || !N11C)
7284 return SDValue();
7285 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7286 return SDValue();
7287
7288 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7289 SDValue N00 = N0->getOperand(0);
7290 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7291 if (!N00->hasOneUse())
7292 return SDValue();
7293 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7294 if (!N001C || N001C->getZExtValue() != 0xFF)
7295 return SDValue();
7296 N00 = N00.getOperand(0);
7297 LookPassAnd0 = true;
7298 }
7299
7300 SDValue N10 = N1->getOperand(0);
7301 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7302 if (!N10->hasOneUse())
7303 return SDValue();
7304 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7305 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7306 // for X86.
7307 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7308 N101C->getZExtValue() != 0xFFFF))
7309 return SDValue();
7310 N10 = N10.getOperand(0);
7311 LookPassAnd1 = true;
7312 }
7313
7314 if (N00 != N10)
7315 return SDValue();
7316
7317 // Make sure everything beyond the low halfword gets set to zero since the SRL
7318 // 16 will clear the top bits.
7319 unsigned OpSizeInBits = VT.getSizeInBits();
7320 if (OpSizeInBits > 16) {
7321 // If the left-shift isn't masked out then the only way this is a bswap is
7322 // if all bits beyond the low 8 are 0. In that case the entire pattern
7323 // reduces to a left shift anyway: leave it for other parts of the combiner.
7324 if (DemandHighBits && !LookPassAnd0)
7325 return SDValue();
7326
7327 // However, if the right shift isn't masked out then it might be because
7328 // it's not needed. See if we can spot that too. If the high bits aren't
7329 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7330 // upper bits to be zero.
7331 if (!LookPassAnd1) {
7332 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7333 if (!DAG.MaskedValueIsZero(N10,
7334 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7335 return SDValue();
7336 }
7337 }
7338
7339 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7340 if (OpSizeInBits > 16) {
7341 SDLoc DL(N);
7342 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7343 DAG.getConstant(OpSizeInBits - 16, DL,
7344 getShiftAmountTy(VT)));
7345 }
7346 return Res;
7347}
7348
7349/// Return true if the specified node is an element that makes up a 32-bit
7350/// packed halfword byteswap.
7351/// ((x & 0x000000ff) << 8) |
7352/// ((x & 0x0000ff00) >> 8) |
7353/// ((x & 0x00ff0000) << 8) |
7354/// ((x & 0xff000000) >> 8)
7356 if (!N->hasOneUse())
7357 return false;
7358
7359 unsigned Opc = N.getOpcode();
7360 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7361 return false;
7362
7363 SDValue N0 = N.getOperand(0);
7364 unsigned Opc0 = N0.getOpcode();
7365 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7366 return false;
7367
7368 ConstantSDNode *N1C = nullptr;
7369 // SHL or SRL: look upstream for AND mask operand
7370 if (Opc == ISD::AND)
7371 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7372 else if (Opc0 == ISD::AND)
7373 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7374 if (!N1C)
7375 return false;
7376
7377 unsigned MaskByteOffset;
7378 switch (N1C->getZExtValue()) {
7379 default:
7380 return false;
7381 case 0xFF: MaskByteOffset = 0; break;
7382 case 0xFF00: MaskByteOffset = 1; break;
7383 case 0xFFFF:
7384 // In case demanded bits didn't clear the bits that will be shifted out.
7385 // This is needed for X86.
7386 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7387 MaskByteOffset = 1;
7388 break;
7389 }
7390 return false;
7391 case 0xFF0000: MaskByteOffset = 2; break;
7392 case 0xFF000000: MaskByteOffset = 3; break;
7393 }
7394
7395 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7396 if (Opc == ISD::AND) {
7397 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7398 // (x >> 8) & 0xff
7399 // (x >> 8) & 0xff0000
7400 if (Opc0 != ISD::SRL)
7401 return false;
7402 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7403 if (!C || C->getZExtValue() != 8)
7404 return false;
7405 } else {
7406 // (x << 8) & 0xff00
7407 // (x << 8) & 0xff000000
7408 if (Opc0 != ISD::SHL)
7409 return false;
7410 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7411 if (!C || C->getZExtValue() != 8)
7412 return false;
7413 }
7414 } else if (Opc == ISD::SHL) {
7415 // (x & 0xff) << 8
7416 // (x & 0xff0000) << 8
7417 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7418 return false;
7419 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7420 if (!C || C->getZExtValue() != 8)
7421 return false;
7422 } else { // Opc == ISD::SRL
7423 // (x & 0xff00) >> 8
7424 // (x & 0xff000000) >> 8
7425 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7426 return false;
7427 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7428 if (!C || C->getZExtValue() != 8)
7429 return false;
7430 }
7431
7432 if (Parts[MaskByteOffset])
7433 return false;
7434
7435 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7436 return true;
7437}
7438
7439// Match 2 elements of a packed halfword bswap.
7441 if (N.getOpcode() == ISD::OR)
7442 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7443 isBSwapHWordElement(N.getOperand(1), Parts);
7444
7445 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7446 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7447 if (!C || C->getAPIntValue() != 16)
7448 return false;
7449 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7450 return true;
7451 }
7452
7453 return false;
7454}
7455
7456// Match this pattern:
7457// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7458// And rewrite this to:
7459// (rotr (bswap A), 16)
7461 SelectionDAG &DAG, SDNode *N, SDValue N0,
7462 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7463 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7464 "MatchBSwapHWordOrAndAnd: expecting i32");
7465 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7466 return SDValue();
7467 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7468 return SDValue();
7469 // TODO: this is too restrictive; lifting this restriction requires more tests
7470 if (!N0->hasOneUse() || !N1->hasOneUse())
7471 return SDValue();
7474 if (!Mask0 || !Mask1)
7475 return SDValue();
7476 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7477 Mask1->getAPIntValue() != 0x00ff00ff)
7478 return SDValue();
7479 SDValue Shift0 = N0.getOperand(0);
7480 SDValue Shift1 = N1.getOperand(0);
7481 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7482 return SDValue();
7483 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7484 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7485 if (!ShiftAmt0 || !ShiftAmt1)
7486 return SDValue();
7487 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7488 return SDValue();
7489 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7490 return SDValue();
7491
7492 SDLoc DL(N);
7493 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7494 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7495 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7496}
7497
7498/// Match a 32-bit packed halfword bswap. That is
7499/// ((x & 0x000000ff) << 8) |
7500/// ((x & 0x0000ff00) >> 8) |
7501/// ((x & 0x00ff0000) << 8) |
7502/// ((x & 0xff000000) >> 8)
7503/// => (rotl (bswap x), 16)
7504SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7505 if (!LegalOperations)
7506 return SDValue();
7507
7508 EVT VT = N->getValueType(0);
7509 if (VT != MVT::i32)
7510 return SDValue();
7512 return SDValue();
7513
7514 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7515 getShiftAmountTy(VT)))
7516 return BSwap;
7517
7518 // Try again with commuted operands.
7519 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7520 getShiftAmountTy(VT)))
7521 return BSwap;
7522
7523
7524 // Look for either
7525 // (or (bswaphpair), (bswaphpair))
7526 // (or (or (bswaphpair), (and)), (and))
7527 // (or (or (and), (bswaphpair)), (and))
7528 SDNode *Parts[4] = {};
7529
7530 if (isBSwapHWordPair(N0, Parts)) {
7531 // (or (or (and), (and)), (or (and), (and)))
7532 if (!isBSwapHWordPair(N1, Parts))
7533 return SDValue();
7534 } else if (N0.getOpcode() == ISD::OR) {
7535 // (or (or (or (and), (and)), (and)), (and))
7536 if (!isBSwapHWordElement(N1, Parts))
7537 return SDValue();
7538 SDValue N00 = N0.getOperand(0);
7539 SDValue N01 = N0.getOperand(1);
7540 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7541 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7542 return SDValue();
7543 } else {
7544 return SDValue();
7545 }
7546
7547 // Make sure the parts are all coming from the same node.
7548 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7549 return SDValue();
7550
7551 SDLoc DL(N);
7552 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7553 SDValue(Parts[0], 0));
7554
7555 // Result of the bswap should be rotated by 16. If it's not legal, then
7556 // do (x << 16) | (x >> 16).
7557 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7559 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7561 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7562 return DAG.getNode(ISD::OR, DL, VT,
7563 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7564 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7565}
7566
7567/// This contains all DAGCombine rules which reduce two values combined by
7568/// an Or operation to a single value \see visitANDLike().
7569SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
7570 EVT VT = N1.getValueType();
7571 SDLoc DL(N);
7572
7573 // fold (or x, undef) -> -1
7574 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7575 return DAG.getAllOnesConstant(DL, VT);
7576
7577 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7578 return V;
7579
7580 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7581 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7582 // Don't increase # computations.
7583 (N0->hasOneUse() || N1->hasOneUse())) {
7584 // We can only do this xform if we know that bits from X that are set in C2
7585 // but not in C1 are already zero. Likewise for Y.
7586 if (const ConstantSDNode *N0O1C =
7588 if (const ConstantSDNode *N1O1C =
7590 // We can only do this xform if we know that bits from X that are set in
7591 // C2 but not in C1 are already zero. Likewise for Y.
7592 const APInt &LHSMask = N0O1C->getAPIntValue();
7593 const APInt &RHSMask = N1O1C->getAPIntValue();
7594
7595 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7596 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7597 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7598 N0.getOperand(0), N1.getOperand(0));
7599 return DAG.getNode(ISD::AND, DL, VT, X,
7600 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7601 }
7602 }
7603 }
7604 }
7605
7606 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7607 if (N0.getOpcode() == ISD::AND &&
7608 N1.getOpcode() == ISD::AND &&
7609 N0.getOperand(0) == N1.getOperand(0) &&
7610 // Don't increase # computations.
7611 (N0->hasOneUse() || N1->hasOneUse())) {
7612 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7613 N0.getOperand(1), N1.getOperand(1));
7614 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7615 }
7616
7617 return SDValue();
7618}
7619
7620/// OR combines for which the commuted variant will be tried as well.
7622 SDNode *N) {
7623 EVT VT = N0.getValueType();
7624
7625 auto peekThroughResize = [](SDValue V) {
7626 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7627 return V->getOperand(0);
7628 return V;
7629 };
7630
7631 SDValue N0Resized = peekThroughResize(N0);
7632 if (N0Resized.getOpcode() == ISD::AND) {
7633 SDValue N1Resized = peekThroughResize(N1);
7634 SDValue N00 = N0Resized.getOperand(0);
7635 SDValue N01 = N0Resized.getOperand(1);
7636
7637 // fold or (and x, y), x --> x
7638 if (N00 == N1Resized || N01 == N1Resized)
7639 return N1;
7640
7641 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7642 // TODO: Set AllowUndefs = true.
7643 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7644 /* AllowUndefs */ false)) {
7645 if (peekThroughResize(NotOperand) == N1Resized)
7646 return DAG.getNode(ISD::OR, SDLoc(N), VT,
7647 DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
7648 }
7649
7650 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7651 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7652 /* AllowUndefs */ false)) {
7653 if (peekThroughResize(NotOperand) == N1Resized)
7654 return DAG.getNode(ISD::OR, SDLoc(N), VT,
7655 DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
7656 }
7657 }
7658
7659 if (N0.getOpcode() == ISD::XOR) {
7660 // fold or (xor x, y), x --> or x, y
7661 // or (xor x, y), (x and/or y) --> or x, y
7662 SDValue N00 = N0.getOperand(0);
7663 SDValue N01 = N0.getOperand(1);
7664 if (N00 == N1)
7665 return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
7666 if (N01 == N1)
7667 return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
7668
7669 if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
7670 SDValue N10 = N1.getOperand(0);
7671 SDValue N11 = N1.getOperand(1);
7672 if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
7673 return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
7674 }
7675 }
7676
7677 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7678 return R;
7679
7680 auto peekThroughZext = [](SDValue V) {
7681 if (V->getOpcode() == ISD::ZERO_EXTEND)
7682 return V->getOperand(0);
7683 return V;
7684 };
7685
7686 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7687 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7688 N0.getOperand(0) == N1.getOperand(0) &&
7689 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7690 return N0;
7691
7692 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7693 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7694 N0.getOperand(1) == N1.getOperand(0) &&
7695 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7696 return N0;
7697
7698 return SDValue();
7699}
7700
7701SDValue DAGCombiner::visitOR(SDNode *N) {
7702 SDValue N0 = N->getOperand(0);
7703 SDValue N1 = N->getOperand(1);
7704 EVT VT = N1.getValueType();
7705
7706 // x | x --> x
7707 if (N0 == N1)
7708 return N0;
7709
7710 // fold (or c1, c2) -> c1|c2
7711 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
7712 return C;
7713
7714 // canonicalize constant to RHS
7717 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
7718
7719 // fold vector ops
7720 if (VT.isVector()) {
7721 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
7722 return FoldedVOp;
7723
7724 // fold (or x, 0) -> x, vector edition
7726 return N0;
7727
7728 // fold (or x, -1) -> -1, vector edition
7730 // do not return N1, because undef node may exist in N1
7731 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
7732
7733 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7734 // Do this only if the resulting type / shuffle is legal.
7735 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7736 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7737 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7738 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7739 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7740 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7741 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7742 // Ensure both shuffles have a zero input.
7743 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7744 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7745 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7746 bool CanFold = true;
7747 int NumElts = VT.getVectorNumElements();
7748 SmallVector<int, 4> Mask(NumElts, -1);
7749
7750 for (int i = 0; i != NumElts; ++i) {
7751 int M0 = SV0->getMaskElt(i);
7752 int M1 = SV1->getMaskElt(i);
7753
7754 // Determine if either index is pointing to a zero vector.
7755 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7756 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7757
7758 // If one element is zero and the otherside is undef, keep undef.
7759 // This also handles the case that both are undef.
7760 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7761 continue;
7762
7763 // Make sure only one of the elements is zero.
7764 if (M0Zero == M1Zero) {
7765 CanFold = false;
7766 break;
7767 }
7768
7769 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7770
7771 // We have a zero and non-zero element. If the non-zero came from
7772 // SV0 make the index a LHS index. If it came from SV1, make it
7773 // a RHS index. We need to mod by NumElts because we don't care
7774 // which operand it came from in the original shuffles.
7775 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7776 }
7777
7778 if (CanFold) {
7779 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7780 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7781
7782 SDValue LegalShuffle =
7783 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
7784 Mask, DAG);
7785 if (LegalShuffle)
7786 return LegalShuffle;
7787 }
7788 }
7789 }
7790 }
7791
7792 // fold (or x, 0) -> x
7793 if (isNullConstant(N1))
7794 return N0;
7795
7796 // fold (or x, -1) -> -1
7797 if (isAllOnesConstant(N1))
7798 return N1;
7799
7800 if (SDValue NewSel = foldBinOpIntoSelect(N))
7801 return NewSel;
7802
7803 // fold (or x, c) -> c iff (x & ~c) == 0
7804 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7805 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7806 return N1;
7807
7808 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7809 return R;
7810
7811 if (SDValue Combined = visitORLike(N0, N1, N))
7812 return Combined;
7813
7814 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7815 return Combined;
7816
7817 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7818 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7819 return BSwap;
7820 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7821 return BSwap;
7822
7823 // reassociate or
7824 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
7825 return ROR;
7826
7827 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7828 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
7829 VT, N0, N1))
7830 return SD;
7831
7832 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7833 // iff (c1 & c2) != 0 or c1/c2 are undef.
7834 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7835 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7836 };
7837 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7838 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7839 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7840 {N1, N0.getOperand(1)})) {
7841 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7842 AddToWorklist(IOR.getNode());
7843 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
7844 }
7845 }
7846
7847 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7848 return Combined;
7849 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7850 return Combined;
7851
7852 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7853 if (N0.getOpcode() == N1.getOpcode())
7854 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7855 return V;
7856
7857 // See if this is some rotate idiom.
7858 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7859 return Rot;
7860
7861 if (SDValue Load = MatchLoadCombine(N))
7862 return Load;
7863
7864 // Simplify the operands using demanded-bits information.
7866 return SDValue(N, 0);
7867
7868 // If OR can be rewritten into ADD, try combines based on ADD.
7869 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7870 DAG.isADDLike(SDValue(N, 0)))
7871 if (SDValue Combined = visitADDLike(N))
7872 return Combined;
7873
7874 // Postpone until legalization completed to avoid interference with bswap
7875 // folding
7876 if (LegalOperations || VT.isVector())
7877 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7878 return R;
7879
7880 return SDValue();
7881}
7882
7884 SDValue &Mask) {
7885 if (Op.getOpcode() == ISD::AND &&
7886 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7887 Mask = Op.getOperand(1);
7888 return Op.getOperand(0);
7889 }
7890 return Op;
7891}
7892
7893/// Match "(X shl/srl V1) & V2" where V2 may not be present.
7894static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7895 SDValue &Mask) {
7896 Op = stripConstantMask(DAG, Op, Mask);
7897 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7898 Shift = Op;
7899 return true;
7900 }
7901 return false;
7902}
7903
7904/// Helper function for visitOR to extract the needed side of a rotate idiom
7905/// from a shl/srl/mul/udiv. This is meant to handle cases where
7906/// InstCombine merged some outside op with one of the shifts from
7907/// the rotate pattern.
7908/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7909/// Otherwise, returns an expansion of \p ExtractFrom based on the following
7910/// patterns:
7911///
7912/// (or (add v v) (shrl v bitwidth-1)):
7913/// expands (add v v) -> (shl v 1)
7914///
7915/// (or (mul v c0) (shrl (mul v c1) c2)):
7916/// expands (mul v c0) -> (shl (mul v c1) c3)
7917///
7918/// (or (udiv v c0) (shl (udiv v c1) c2)):
7919/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
7920///
7921/// (or (shl v c0) (shrl (shl v c1) c2)):
7922/// expands (shl v c0) -> (shl (shl v c1) c3)
7923///
7924/// (or (shrl v c0) (shl (shrl v c1) c2)):
7925/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
7926///
7927/// Such that in all cases, c3+c2==bitwidth(op v c1).
7929 SDValue ExtractFrom, SDValue &Mask,
7930 const SDLoc &DL) {
7931 assert(OppShift && ExtractFrom && "Empty SDValue");
7932 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
7933 return SDValue();
7934
7935 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7936
7937 // Value and Type of the shift.
7938 SDValue OppShiftLHS = OppShift.getOperand(0);
7939 EVT ShiftedVT = OppShiftLHS.getValueType();
7940
7941 // Amount of the existing shift.
7942 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7943
7944 // (add v v) -> (shl v 1)
7945 // TODO: Should this be a general DAG canonicalization?
7946 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7947 ExtractFrom.getOpcode() == ISD::ADD &&
7948 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7949 ExtractFrom.getOperand(0) == OppShiftLHS &&
7950 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7951 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7952 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7953
7954 // Preconditions:
7955 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7956 //
7957 // Find opcode of the needed shift to be extracted from (op0 v c0).
7958 unsigned Opcode = ISD::DELETED_NODE;
7959 bool IsMulOrDiv = false;
7960 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7961 // opcode or its arithmetic (mul or udiv) variant.
7962 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7963 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7964 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7965 return false;
7966 Opcode = NeededShift;
7967 return true;
7968 };
7969 // op0 must be either the needed shift opcode or the mul/udiv equivalent
7970 // that the needed shift can be extracted from.
7971 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7972 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7973 return SDValue();
7974
7975 // op0 must be the same opcode on both sides, have the same LHS argument,
7976 // and produce the same value type.
7977 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7978 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7979 ShiftedVT != ExtractFrom.getValueType())
7980 return SDValue();
7981
7982 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7983 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
7984 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
7985 ConstantSDNode *ExtractFromCst =
7986 isConstOrConstSplat(ExtractFrom.getOperand(1));
7987 // TODO: We should be able to handle non-uniform constant vectors for these values
7988 // Check that we have constant values.
7989 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
7990 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
7991 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
7992 return SDValue();
7993
7994 // Compute the shift amount we need to extract to complete the rotate.
7995 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
7996 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
7997 return SDValue();
7998 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
7999 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8000 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8001 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8002 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8003
8004 // Now try extract the needed shift from the ExtractFrom op and see if the
8005 // result matches up with the existing shift's LHS op.
8006 if (IsMulOrDiv) {
8007 // Op to extract from is a mul or udiv by a constant.
8008 // Check:
8009 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8010 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8011 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8012 NeededShiftAmt.getZExtValue());
8013 APInt ResultAmt;
8014 APInt Rem;
8015 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8016 if (Rem != 0 || ResultAmt != OppLHSAmt)
8017 return SDValue();
8018 } else {
8019 // Op to extract from is a shift by a constant.
8020 // Check:
8021 // c2 - (bitwidth(op0 v c0) - c1) == c0
8022 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8023 ExtractFromAmt.getBitWidth()))
8024 return SDValue();
8025 }
8026
8027 // Return the expanded shift op that should allow a rotate to be formed.
8028 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8029 EVT ResVT = ExtractFrom.getValueType();
8030 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8031 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8032}
8033
8034// Return true if we can prove that, whenever Neg and Pos are both in the
8035// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8036// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8037//
8038// (or (shift1 X, Neg), (shift2 X, Pos))
8039//
8040// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8041// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8042// to consider shift amounts with defined behavior.
8043//
8044// The IsRotate flag should be set when the LHS of both shifts is the same.
8045// Otherwise if matching a general funnel shift, it should be clear.
8046static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8047 SelectionDAG &DAG, bool IsRotate) {
8048 const auto &TLI = DAG.getTargetLoweringInfo();
8049 // If EltSize is a power of 2 then:
8050 //
8051 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8052 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8053 //
8054 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8055 // for the stronger condition:
8056 //
8057 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8058 //
8059 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8060 // we can just replace Neg with Neg' for the rest of the function.
8061 //
8062 // In other cases we check for the even stronger condition:
8063 //
8064 // Neg == EltSize - Pos [B]
8065 //
8066 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8067 // behavior if Pos == 0 (and consequently Neg == EltSize).
8068 //
8069 // We could actually use [A] whenever EltSize is a power of 2, but the
8070 // only extra cases that it would match are those uninteresting ones
8071 // where Neg and Pos are never in range at the same time. E.g. for
8072 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8073 // as well as (sub 32, Pos), but:
8074 //
8075 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8076 //
8077 // always invokes undefined behavior for 32-bit X.
8078 //
8079 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8080 // This allows us to peek through any operations that only affect Mask's
8081 // un-demanded bits.
8082 //
8083 // NOTE: We can only do this when matching operations which won't modify the
8084 // least Log2(EltSize) significant bits and not a general funnel shift.
8085 unsigned MaskLoBits = 0;
8086 if (IsRotate && isPowerOf2_64(EltSize)) {
8087 unsigned Bits = Log2_64(EltSize);
8088 unsigned NegBits = Neg.getScalarValueSizeInBits();
8089 if (NegBits >= Bits) {
8090 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8091 if (SDValue Inner =
8093 Neg = Inner;
8094 MaskLoBits = Bits;
8095 }
8096 }
8097 }
8098
8099 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8100 if (Neg.getOpcode() != ISD::SUB)
8101 return false;
8103 if (!NegC)
8104 return false;
8105 SDValue NegOp1 = Neg.getOperand(1);
8106
8107 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8108 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8109 // are redundant for the purpose of the equality.
8110 if (MaskLoBits) {
8111 unsigned PosBits = Pos.getScalarValueSizeInBits();
8112 if (PosBits >= MaskLoBits) {
8113 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8114 if (SDValue Inner =
8116 Pos = Inner;
8117 }
8118 }
8119 }
8120
8121 // The condition we need is now:
8122 //
8123 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8124 //
8125 // If NegOp1 == Pos then we need:
8126 //
8127 // EltSize & Mask == NegC & Mask
8128 //
8129 // (because "x & Mask" is a truncation and distributes through subtraction).
8130 //
8131 // We also need to account for a potential truncation of NegOp1 if the amount
8132 // has already been legalized to a shift amount type.
8133 APInt Width;
8134 if ((Pos == NegOp1) ||
8135 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8136 Width = NegC->getAPIntValue();
8137
8138 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8139 // Then the condition we want to prove becomes:
8140 //
8141 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8142 //
8143 // which, again because "x & Mask" is a truncation, becomes:
8144 //
8145 // NegC & Mask == (EltSize - PosC) & Mask
8146 // EltSize & Mask == (NegC + PosC) & Mask
8147 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8148 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8149 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8150 else
8151 return false;
8152 } else
8153 return false;
8154
8155 // Now we just need to check that EltSize & Mask == Width & Mask.
8156 if (MaskLoBits)
8157 // EltSize & Mask is 0 since Mask is EltSize - 1.
8158 return Width.getLoBits(MaskLoBits) == 0;
8159 return Width == EltSize;
8160}
8161
8162// A subroutine of MatchRotate used once we have found an OR of two opposite
8163// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8164// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8165// former being preferred if supported. InnerPos and InnerNeg are Pos and
8166// Neg with outer conversions stripped away.
8167SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8168 SDValue Neg, SDValue InnerPos,
8169 SDValue InnerNeg, bool HasPos,
8170 unsigned PosOpcode, unsigned NegOpcode,
8171 const SDLoc &DL) {
8172 // fold (or (shl x, (*ext y)),
8173 // (srl x, (*ext (sub 32, y)))) ->
8174 // (rotl x, y) or (rotr x, (sub 32, y))
8175 //
8176 // fold (or (shl x, (*ext (sub 32, y))),
8177 // (srl x, (*ext y))) ->
8178 // (rotr x, y) or (rotl x, (sub 32, y))
8179 EVT VT = Shifted.getValueType();
8180 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8181 /*IsRotate*/ true)) {
8182 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8183 HasPos ? Pos : Neg);
8184 }
8185
8186 return SDValue();
8187}
8188
8189// A subroutine of MatchRotate used once we have found an OR of two opposite
8190// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8191// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8192// former being preferred if supported. InnerPos and InnerNeg are Pos and
8193// Neg with outer conversions stripped away.
8194// TODO: Merge with MatchRotatePosNeg.
8195SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8196 SDValue Neg, SDValue InnerPos,
8197 SDValue InnerNeg, bool HasPos,
8198 unsigned PosOpcode, unsigned NegOpcode,
8199 const SDLoc &DL) {
8200 EVT VT = N0.getValueType();
8201 unsigned EltBits = VT.getScalarSizeInBits();
8202
8203 // fold (or (shl x0, (*ext y)),
8204 // (srl x1, (*ext (sub 32, y)))) ->
8205 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8206 //
8207 // fold (or (shl x0, (*ext (sub 32, y))),
8208 // (srl x1, (*ext y))) ->
8209 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8210 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8211 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8212 HasPos ? Pos : Neg);
8213 }
8214
8215 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8216 // so for now just use the PosOpcode case if its legal.
8217 // TODO: When can we use the NegOpcode case?
8218 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8219 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8220 if (Op.getOpcode() != BinOpc)
8221 return false;
8222 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8223 return Cst && (Cst->getAPIntValue() == Imm);
8224 };
8225
8226 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8227 // -> (fshl x0, x1, y)
8228 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8229 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8230 InnerPos == InnerNeg.getOperand(0) &&
8232 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8233 }
8234
8235 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8236 // -> (fshr x0, x1, y)
8237 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8238 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8239 InnerNeg == InnerPos.getOperand(0) &&
8241 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8242 }
8243
8244 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8245 // -> (fshr x0, x1, y)
8246 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8247 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8248 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8249 InnerNeg == InnerPos.getOperand(0) &&
8251 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8252 }
8253 }
8254
8255 return SDValue();
8256}
8257
8258// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8259// idioms for rotate, and if the target supports rotation instructions, generate
8260// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8261// with different shifted sources.
8262SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8263 EVT VT = LHS.getValueType();
8264
8265 // The target must have at least one rotate/funnel flavor.
8266 // We still try to match rotate by constant pre-legalization.
8267 // TODO: Support pre-legalization funnel-shift by constant.
8268 bool HasROTL = hasOperation(ISD::ROTL, VT);
8269 bool HasROTR = hasOperation(ISD::ROTR, VT);
8270 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8271 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8272
8273 // If the type is going to be promoted and the target has enabled custom
8274 // lowering for rotate, allow matching rotate by non-constants. Only allow
8275 // this for scalar types.
8276 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8280 }
8281
8282 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8283 return SDValue();
8284
8285 // Check for truncated rotate.
8286 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8287 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8288 assert(LHS.getValueType() == RHS.getValueType());
8289 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8290 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8291 }
8292 }
8293
8294 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8295 SDValue LHSShift; // The shift.
8296 SDValue LHSMask; // AND value if any.
8297 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8298
8299 SDValue RHSShift; // The shift.
8300 SDValue RHSMask; // AND value if any.
8301 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8302
8303 // If neither side matched a rotate half, bail
8304 if (!LHSShift && !RHSShift)
8305 return SDValue();
8306
8307 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8308 // side of the rotate, so try to handle that here. In all cases we need to
8309 // pass the matched shift from the opposite side to compute the opcode and
8310 // needed shift amount to extract. We still want to do this if both sides
8311 // matched a rotate half because one half may be a potential overshift that
8312 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8313 // single one).
8314
8315 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8316 if (LHSShift)
8317 if (SDValue NewRHSShift =
8318 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8319 RHSShift = NewRHSShift;
8320 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8321 if (RHSShift)
8322 if (SDValue NewLHSShift =
8323 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8324 LHSShift = NewLHSShift;
8325
8326 // If a side is still missing, nothing else we can do.
8327 if (!RHSShift || !LHSShift)
8328 return SDValue();
8329
8330 // At this point we've matched or extracted a shift op on each side.
8331
8332 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8333 return SDValue(); // Shifts must disagree.
8334
8335 // Canonicalize shl to left side in a shl/srl pair.
8336 if (RHSShift.getOpcode() == ISD::SHL) {
8337 std::swap(LHS, RHS);
8338 std::swap(LHSShift, RHSShift);
8339 std::swap(LHSMask, RHSMask);
8340 }
8341
8342 // Something has gone wrong - we've lost the shl/srl pair - bail.
8343 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8344 return SDValue();
8345
8346 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8347 SDValue LHSShiftArg = LHSShift.getOperand(0);
8348 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8349 SDValue RHSShiftArg = RHSShift.getOperand(0);
8350 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8351
8352 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8354 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8355 };
8356
8357 auto ApplyMasks = [&](SDValue Res) {
8358 // If there is an AND of either shifted operand, apply it to the result.
8359 if (LHSMask.getNode() || RHSMask.getNode()) {
8362
8363 if (LHSMask.getNode()) {
8364 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8365 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8366 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8367 }
8368 if (RHSMask.getNode()) {
8369 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8370 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8371 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8372 }
8373
8374 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8375 }
8376
8377 return Res;
8378 };
8379
8380 // TODO: Support pre-legalization funnel-shift by constant.
8381 bool IsRotate = LHSShiftArg == RHSShiftArg;
8382 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8383 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8384 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8385 // Look for a disguised rotate by constant.
8386 // The common shifted operand X may be hidden inside another 'or'.
8387 SDValue X, Y;
8388 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8389 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8390 return false;
8391 if (CommonOp == Or.getOperand(0)) {
8392 X = CommonOp;
8393 Y = Or.getOperand(1);
8394 return true;
8395 }
8396 if (CommonOp == Or.getOperand(1)) {
8397 X = CommonOp;
8398 Y = Or.getOperand(0);
8399 return true;
8400 }
8401 return false;
8402 };
8403
8404 SDValue Res;
8405 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8406 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8407 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8408 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8409 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8410 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8411 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8412 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8413 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8414 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8415 } else {
8416 return SDValue();
8417 }
8418
8419 return ApplyMasks(Res);
8420 }
8421
8422 return SDValue(); // Requires funnel shift support.
8423 }
8424
8425 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8426 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8427 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8428 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8429 // iff C1+C2 == EltSizeInBits
8430 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8431 SDValue Res;
8432 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8433 bool UseROTL = !LegalOperations || HasROTL;
8434 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8435 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8436 } else {
8437 bool UseFSHL = !LegalOperations || HasFSHL;
8438 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8439 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8440 }
8441
8442 return ApplyMasks(Res);
8443 }
8444
8445 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8446 // shift.
8447 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8448 return SDValue();
8449
8450 // If there is a mask here, and we have a variable shift, we can't be sure
8451 // that we're masking out the right stuff.
8452 if (LHSMask.getNode() || RHSMask.getNode())
8453 return SDValue();
8454
8455 // If the shift amount is sign/zext/any-extended just peel it off.
8456 SDValue LExtOp0 = LHSShiftAmt;
8457 SDValue RExtOp0 = RHSShiftAmt;
8458 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8459 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8460 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8461 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8462 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8463 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8464 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8465 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8466 LExtOp0 = LHSShiftAmt.getOperand(0);
8467 RExtOp0 = RHSShiftAmt.getOperand(0);
8468 }
8469
8470 if (IsRotate && (HasROTL || HasROTR)) {
8471 SDValue TryL =
8472 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8473 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8474 if (TryL)
8475 return TryL;
8476
8477 SDValue TryR =
8478 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8479 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8480 if (TryR)
8481 return TryR;
8482 }
8483
8484 SDValue TryL =
8485 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8486 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8487 if (TryL)
8488 return TryL;
8489
8490 SDValue TryR =
8491 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8492 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8493 if (TryR)
8494 return TryR;
8495
8496 return SDValue();
8497}
8498
8499/// Recursively traverses the expression calculating the origin of the requested
8500/// byte of the given value. Returns std::nullopt if the provider can't be
8501/// calculated.
8502///
8503/// For all the values except the root of the expression, we verify that the
8504/// value has exactly one use and if not then return std::nullopt. This way if
8505/// the origin of the byte is returned it's guaranteed that the values which
8506/// contribute to the byte are not used outside of this expression.
8507
8508/// However, there is a special case when dealing with vector loads -- we allow
8509/// more than one use if the load is a vector type. Since the values that
8510/// contribute to the byte ultimately come from the ExtractVectorElements of the
8511/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8512/// because those operations are independent from the pattern to be combined.
8513/// For vector loads, we simply care that the ByteProviders are adjacent
8514/// positions of the same vector, and their index matches the byte that is being
8515/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8516/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8517/// byte position we are trying to provide for the LoadCombine. If these do
8518/// not match, then we can not combine the vector loads. \p Index uses the
8519/// byte position we are trying to provide for and is matched against the
8520/// shl and load size. The \p Index algorithm ensures the requested byte is
8521/// provided for by the pattern, and the pattern does not over provide bytes.
8522///
8523///
8524/// The supported LoadCombine pattern for vector loads is as follows
8525/// or
8526/// / \
8527/// or shl
8528/// / \ |
8529/// or shl zext
8530/// / \ | |
8531/// shl zext zext EVE*
8532/// | | | |
8533/// zext EVE* EVE* LOAD
8534/// | | |
8535/// EVE* LOAD LOAD
8536/// |
8537/// LOAD
8538///
8539/// *ExtractVectorElement
8541
8542static std::optional<SDByteProvider>
8544 std::optional<uint64_t> VectorIndex,
8545 unsigned StartingIndex = 0) {
8546
8547 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8548 if (Depth == 10)
8549 return std::nullopt;
8550
8551 // Only allow multiple uses if the instruction is a vector load (in which
8552 // case we will use the load for every ExtractVectorElement)
8553 if (Depth && !Op.hasOneUse() &&
8554 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8555 return std::nullopt;
8556
8557 // Fail to combine if we have encountered anything but a LOAD after handling
8558 // an ExtractVectorElement.
8559 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8560 return std::nullopt;
8561
8562 unsigned BitWidth = Op.getValueSizeInBits();
8563 if (BitWidth % 8 != 0)
8564 return std::nullopt;
8565 unsigned ByteWidth = BitWidth / 8;
8566 assert(Index < ByteWidth && "invalid index requested");
8567 (void) ByteWidth;
8568
8569 switch (Op.getOpcode()) {
8570 case ISD::OR: {
8571 auto LHS =
8572 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8573 if (!LHS)
8574 return std::nullopt;
8575 auto RHS =
8576 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8577 if (!RHS)
8578 return std::nullopt;
8579
8580 if (LHS->isConstantZero())
8581 return RHS;
8582 if (RHS->isConstantZero())
8583 return LHS;
8584 return std::nullopt;
8585 }
8586 case ISD::SHL: {
8587 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8588 if (!ShiftOp)
8589 return std::nullopt;
8590
8591 uint64_t BitShift = ShiftOp->getZExtValue();
8592
8593 if (BitShift % 8 != 0)
8594 return std::nullopt;
8595 uint64_t ByteShift = BitShift / 8;
8596
8597 // If we are shifting by an amount greater than the index we are trying to
8598 // provide, then do not provide anything. Otherwise, subtract the index by
8599 // the amount we shifted by.
8600 return Index < ByteShift
8602 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8603 Depth + 1, VectorIndex, Index);
8604 }
8605 case ISD::ANY_EXTEND:
8606 case ISD::SIGN_EXTEND:
8607 case ISD::ZERO_EXTEND: {
8608 SDValue NarrowOp = Op->getOperand(0);
8609 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8610 if (NarrowBitWidth % 8 != 0)
8611 return std::nullopt;
8612 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8613
8614 if (Index >= NarrowByteWidth)
8615 return Op.getOpcode() == ISD::ZERO_EXTEND
8616 ? std::optional<SDByteProvider>(
8618 : std::nullopt;
8619 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8620 StartingIndex);
8621 }
8622 case ISD::BSWAP:
8623 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8624 Depth + 1, VectorIndex, StartingIndex);
8626 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8627 if (!OffsetOp)
8628 return std::nullopt;
8629
8630 VectorIndex = OffsetOp->getZExtValue();
8631
8632 SDValue NarrowOp = Op->getOperand(0);
8633 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8634 if (NarrowBitWidth % 8 != 0)
8635 return std::nullopt;
8636 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8637 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8638 // type, leaving the high bits undefined.
8639 if (Index >= NarrowByteWidth)
8640 return std::nullopt;
8641
8642 // Check to see if the position of the element in the vector corresponds
8643 // with the byte we are trying to provide for. In the case of a vector of
8644 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8645 // the element will provide a range of bytes. For example, if we have a
8646 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8647 // 3).
8648 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8649 return std::nullopt;
8650 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8651 return std::nullopt;
8652
8653 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8654 VectorIndex, StartingIndex);
8655 }
8656 case ISD::LOAD: {
8657 auto L = cast<LoadSDNode>(Op.getNode());
8658 if (!L->isSimple() || L->isIndexed())
8659 return std::nullopt;
8660
8661 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8662 if (NarrowBitWidth % 8 != 0)
8663 return std::nullopt;
8664 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8665
8666 // If the width of the load does not reach byte we are trying to provide for
8667 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8668 // question
8669 if (Index >= NarrowByteWidth)
8670 return L->getExtensionType() == ISD::ZEXTLOAD
8671 ? std::optional<SDByteProvider>(
8673 : std::nullopt;
8674
8675 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8676 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8677 }
8678 }
8679
8680 return std::nullopt;
8681}
8682
8683static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8684 return i;
8685}
8686
8687static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8688 return BW - i - 1;
8689}
8690
8691// Check if the bytes offsets we are looking at match with either big or
8692// little endian value loaded. Return true for big endian, false for little
8693// endian, and std::nullopt if match failed.
8694static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8695 int64_t FirstOffset) {
8696 // The endian can be decided only when it is 2 bytes at least.
8697 unsigned Width = ByteOffsets.size();
8698 if (Width < 2)
8699 return std::nullopt;
8700
8701 bool BigEndian = true, LittleEndian = true;
8702 for (unsigned i = 0; i < Width; i++) {
8703 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8704 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8705 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8706 if (!BigEndian && !LittleEndian)
8707 return std::nullopt;
8708 }
8709
8710 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8711 "little endian");
8712 return BigEndian;
8713}
8714
8716 switch (Value.getOpcode()) {
8717 case ISD::TRUNCATE:
8718 case ISD::ZERO_EXTEND:
8719 case ISD::SIGN_EXTEND:
8720 case ISD::ANY_EXTEND:
8721 return stripTruncAndExt(Value.getOperand(0));
8722 }
8723 return Value;
8724}
8725
8726/// Match a pattern where a wide type scalar value is stored by several narrow
8727/// stores. Fold it into a single store or a BSWAP and a store if the targets
8728/// supports it.
8729///
8730/// Assuming little endian target:
8731/// i8 *p = ...
8732/// i32 val = ...
8733/// p[0] = (val >> 0) & 0xFF;
8734/// p[1] = (val >> 8) & 0xFF;
8735/// p[2] = (val >> 16) & 0xFF;
8736/// p[3] = (val >> 24) & 0xFF;
8737/// =>
8738/// *((i32)p) = val;
8739///
8740/// i8 *p = ...
8741/// i32 val = ...
8742/// p[0] = (val >> 24) & 0xFF;
8743/// p[1] = (val >> 16) & 0xFF;
8744/// p[2] = (val >> 8) & 0xFF;
8745/// p[3] = (val >> 0) & 0xFF;
8746/// =>
8747/// *((i32)p) = BSWAP(val);
8748SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8749 // The matching looks for "store (trunc x)" patterns that appear early but are
8750 // likely to be replaced by truncating store nodes during combining.
8751 // TODO: If there is evidence that running this later would help, this
8752 // limitation could be removed. Legality checks may need to be added
8753 // for the created store and optional bswap/rotate.
8754 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8755 return SDValue();
8756
8757 // We only handle merging simple stores of 1-4 bytes.
8758 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8759 EVT MemVT = N->getMemoryVT();
8760 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8761 !N->isSimple() || N->isIndexed())
8762 return SDValue();
8763
8764 // Collect all of the stores in the chain, upto the maximum store width (i64).
8765 SDValue Chain = N->getChain();
8767 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8768 unsigned MaxWideNumBits = 64;
8769 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8770 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8771 // All stores must be the same size to ensure that we are writing all of the
8772 // bytes in the wide value.
8773 // This store should have exactly one use as a chain operand for another
8774 // store in the merging set. If there are other chain uses, then the
8775 // transform may not be safe because order of loads/stores outside of this
8776 // set may not be preserved.
8777 // TODO: We could allow multiple sizes by tracking each stored byte.
8778 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8779 Store->isIndexed() || !Store->hasOneUse())
8780 return SDValue();
8781 Stores.push_back(Store);
8782 Chain = Store->getChain();
8783 if (MaxStores < Stores.size())
8784 return SDValue();
8785 }
8786 // There is no reason to continue if we do not have at least a pair of stores.
8787 if (Stores.size() < 2)
8788 return SDValue();
8789
8790 // Handle simple types only.
8791 LLVMContext &Context = *DAG.getContext();
8792 unsigned NumStores = Stores.size();
8793 unsigned WideNumBits = NumStores * NarrowNumBits;
8794 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8795 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8796 return SDValue();
8797
8798 // Check if all bytes of the source value that we are looking at are stored
8799 // to the same base address. Collect offsets from Base address into OffsetMap.
8800 SDValue SourceValue;
8801 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8802 int64_t FirstOffset = INT64_MAX;
8803 StoreSDNode *FirstStore = nullptr;
8804 std::optional<BaseIndexOffset> Base;
8805 for (auto *Store : Stores) {
8806 // All the stores store different parts of the CombinedValue. A truncate is
8807 // required to get the partial value.
8808 SDValue Trunc = Store->getValue();
8809 if (Trunc.getOpcode() != ISD::TRUNCATE)
8810 return SDValue();
8811 // Other than the first/last part, a shift operation is required to get the
8812 // offset.
8813 int64_t Offset = 0;
8814 SDValue WideVal = Trunc.getOperand(0);
8815 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8816 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8817 // The shift amount must be a constant multiple of the narrow type.
8818 // It is translated to the offset address in the wide source value "y".
8819 //
8820 // x = srl y, ShiftAmtC
8821 // i8 z = trunc x
8822 // store z, ...
8823 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8824 if (ShiftAmtC % NarrowNumBits != 0)
8825 return SDValue();
8826
8827 Offset = ShiftAmtC / NarrowNumBits;
8828 WideVal = WideVal.getOperand(0);
8829 }
8830
8831 // Stores must share the same source value with different offsets.
8832 // Truncate and extends should be stripped to get the single source value.
8833 if (!SourceValue)
8834 SourceValue = WideVal;
8835 else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8836 return SDValue();
8837 else if (SourceValue.getValueType() != WideVT) {
8838 if (WideVal.getValueType() == WideVT ||
8839 WideVal.getScalarValueSizeInBits() >
8840 SourceValue.getScalarValueSizeInBits())
8841 SourceValue = WideVal;
8842 // Give up if the source value type is smaller than the store size.
8843 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8844 return SDValue();
8845 }
8846
8847 // Stores must share the same base address.
8849 int64_t ByteOffsetFromBase = 0;
8850 if (!Base)
8851 Base = Ptr;
8852 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8853 return SDValue();
8854
8855 // Remember the first store.
8856 if (ByteOffsetFromBase < FirstOffset) {
8857 FirstStore = Store;
8858 FirstOffset = ByteOffsetFromBase;
8859 }
8860 // Map the offset in the store and the offset in the combined value, and
8861 // early return if it has been set before.
8862 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8863 return SDValue();
8864 OffsetMap[Offset] = ByteOffsetFromBase;
8865 }
8866
8867 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8868 assert(FirstStore && "First store must be set");
8869
8870 // Check that a store of the wide type is both allowed and fast on the target
8871 const DataLayout &Layout = DAG.getDataLayout();
8872 unsigned Fast = 0;
8873 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8874 *FirstStore->getMemOperand(), &Fast);
8875 if (!Allowed || !Fast)
8876 return SDValue();
8877
8878 // Check if the pieces of the value are going to the expected places in memory
8879 // to merge the stores.
8880 auto checkOffsets = [&](bool MatchLittleEndian) {
8881 if (MatchLittleEndian) {
8882 for (unsigned i = 0; i != NumStores; ++i)
8883 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
8884 return false;
8885 } else { // MatchBigEndian by reversing loop counter.
8886 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
8887 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
8888 return false;
8889 }
8890 return true;
8891 };
8892
8893 // Check if the offsets line up for the native data layout of this target.
8894 bool NeedBswap = false;
8895 bool NeedRotate = false;
8896 if (!checkOffsets(Layout.isLittleEndian())) {
8897 // Special-case: check if byte offsets line up for the opposite endian.
8898 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
8899 NeedBswap = true;
8900 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
8901 NeedRotate = true;
8902 else
8903 return SDValue();
8904 }
8905
8906 SDLoc DL(N);
8907 if (WideVT != SourceValue.getValueType()) {
8908 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
8909 "Unexpected store value to merge");
8910 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8911 }
8912
8913 // Before legalize we can introduce illegal bswaps/rotates which will be later
8914 // converted to an explicit bswap sequence. This way we end up with a single
8915 // store and byte shuffling instead of several stores and byte shuffling.
8916 if (NeedBswap) {
8917 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8918 } else if (NeedRotate) {
8919 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
8920 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8921 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8922 }
8923
8924 SDValue NewStore =
8925 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8926 FirstStore->getPointerInfo(), FirstStore->getAlign());
8927
8928 // Rely on other DAG combine rules to remove the other individual stores.
8929 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8930 return NewStore;
8931}
8932
8933/// Match a pattern where a wide type scalar value is loaded by several narrow
8934/// loads and combined by shifts and ors. Fold it into a single load or a load
8935/// and a BSWAP if the targets supports it.
8936///
8937/// Assuming little endian target:
8938/// i8 *a = ...
8939/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8940/// =>
8941/// i32 val = *((i32)a)
8942///
8943/// i8 *a = ...
8944/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8945/// =>
8946/// i32 val = BSWAP(*((i32)a))
8947///
8948/// TODO: This rule matches complex patterns with OR node roots and doesn't
8949/// interact well with the worklist mechanism. When a part of the pattern is
8950/// updated (e.g. one of the loads) its direct users are put into the worklist,
8951/// but the root node of the pattern which triggers the load combine is not
8952/// necessarily a direct user of the changed node. For example, once the address
8953/// of t28 load is reassociated load combine won't be triggered:
8954/// t25: i32 = add t4, Constant:i32<2>
8955/// t26: i64 = sign_extend t25
8956/// t27: i64 = add t2, t26
8957/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8958/// t29: i32 = zero_extend t28
8959/// t32: i32 = shl t29, Constant:i8<8>
8960/// t33: i32 = or t23, t32
8961/// As a possible fix visitLoad can check if the load can be a part of a load
8962/// combine pattern and add corresponding OR roots to the worklist.
8963SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8964 assert(N->getOpcode() == ISD::OR &&
8965 "Can only match load combining against OR nodes");
8966
8967 // Handles simple types only
8968 EVT VT = N->getValueType(0);
8969 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8970 return SDValue();
8971 unsigned ByteWidth = VT.getSizeInBits() / 8;
8972
8973 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8974 auto MemoryByteOffset = [&](SDByteProvider P) {
8975 assert(P.hasSrc() && "Must be a memory byte provider");
8976 auto *Load = cast<LoadSDNode>(P.Src.value());
8977
8978 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
8979
8980 assert(LoadBitWidth % 8 == 0 &&
8981 "can only analyze providers for individual bytes not bit");
8982 unsigned LoadByteWidth = LoadBitWidth / 8;
8983 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
8984 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
8985 };
8986
8987 std::optional<BaseIndexOffset> Base;
8988 SDValue Chain;
8989
8991 std::optional<SDByteProvider> FirstByteProvider;
8992 int64_t FirstOffset = INT64_MAX;
8993
8994 // Check if all the bytes of the OR we are looking at are loaded from the same
8995 // base address. Collect bytes offsets from Base address in ByteOffsets.
8996 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
8997 unsigned ZeroExtendedBytes = 0;
8998 for (int i = ByteWidth - 1; i >= 0; --i) {
8999 auto P =
9000 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9001 /*StartingIndex*/ i);
9002 if (!P)
9003 return SDValue();
9004
9005 if (P->isConstantZero()) {
9006 // It's OK for the N most significant bytes to be 0, we can just
9007 // zero-extend the load.
9008 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9009 return SDValue();
9010 continue;
9011 }
9012 assert(P->hasSrc() && "provenance should either be memory or zero");
9013 auto *L = cast<LoadSDNode>(P->Src.value());
9014
9015 // All loads must share the same chain
9016 SDValue LChain = L->getChain();
9017 if (!Chain)
9018 Chain = LChain;
9019 else if (Chain != LChain)
9020 return SDValue();
9021
9022 // Loads must share the same base address
9024 int64_t ByteOffsetFromBase = 0;
9025
9026 // For vector loads, the expected load combine pattern will have an
9027 // ExtractElement for each index in the vector. While each of these
9028 // ExtractElements will be accessing the same base address as determined
9029 // by the load instruction, the actual bytes they interact with will differ
9030 // due to different ExtractElement indices. To accurately determine the
9031 // byte position of an ExtractElement, we offset the base load ptr with
9032 // the index multiplied by the byte size of each element in the vector.
9033 if (L->getMemoryVT().isVector()) {
9034 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9035 if (LoadWidthInBit % 8 != 0)
9036 return SDValue();
9037 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9038 Ptr.addToOffset(ByteOffsetFromVector);
9039 }
9040
9041 if (!Base)
9042 Base = Ptr;
9043
9044 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9045 return SDValue();
9046
9047 // Calculate the offset of the current byte from the base address
9048 ByteOffsetFromBase += MemoryByteOffset(*P);
9049 ByteOffsets[i] = ByteOffsetFromBase;
9050
9051 // Remember the first byte load
9052 if (ByteOffsetFromBase < FirstOffset) {
9053 FirstByteProvider = P;
9054 FirstOffset = ByteOffsetFromBase;
9055 }
9056
9057 Loads.insert(L);
9058 }
9059
9060 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9061 "memory, so there must be at least one load which produces the value");
9062 assert(Base && "Base address of the accessed memory location must be set");
9063 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9064
9065 bool NeedsZext = ZeroExtendedBytes > 0;
9066
9067 EVT MemVT =
9068 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9069
9070 if (!MemVT.isSimple())
9071 return SDValue();
9072
9073 // Before legalize we can introduce too wide illegal loads which will be later
9074 // split into legal sized loads. This enables us to combine i64 load by i8
9075 // patterns to a couple of i32 loads on 32 bit targets.
9076 if (LegalOperations &&
9078 MemVT))
9079 return SDValue();
9080
9081 // Check if the bytes of the OR we are looking at match with either big or
9082 // little endian value load
9083 std::optional<bool> IsBigEndian = isBigEndian(
9084 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9085 if (!IsBigEndian)
9086 return SDValue();
9087
9088 assert(FirstByteProvider && "must be set");
9089
9090 // Ensure that the first byte is loaded from zero offset of the first load.
9091 // So the combined value can be loaded from the first load address.
9092 if (MemoryByteOffset(*FirstByteProvider) != 0)
9093 return SDValue();
9094 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9095
9096 // The node we are looking at matches with the pattern, check if we can
9097 // replace it with a single (possibly zero-extended) load and bswap + shift if
9098 // needed.
9099
9100 // If the load needs byte swap check if the target supports it
9101 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9102
9103 // Before legalize we can introduce illegal bswaps which will be later
9104 // converted to an explicit bswap sequence. This way we end up with a single
9105 // load and byte shuffling instead of several loads and byte shuffling.
9106 // We do not introduce illegal bswaps when zero-extending as this tends to
9107 // introduce too many arithmetic instructions.
9108 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9109 !TLI.isOperationLegal(ISD::BSWAP, VT))
9110 return SDValue();
9111
9112 // If we need to bswap and zero extend, we have to insert a shift. Check that
9113 // it is legal.
9114 if (NeedsBswap && NeedsZext && LegalOperations &&
9115 !TLI.isOperationLegal(ISD::SHL, VT))
9116 return SDValue();
9117
9118 // Check that a load of the wide type is both allowed and fast on the target
9119 unsigned Fast = 0;
9120 bool Allowed =
9121 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9122 *FirstLoad->getMemOperand(), &Fast);
9123 if (!Allowed || !Fast)
9124 return SDValue();
9125
9126 SDValue NewLoad =
9127 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9128 Chain, FirstLoad->getBasePtr(),
9129 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9130
9131 // Transfer chain users from old loads to the new load.
9132 for (LoadSDNode *L : Loads)
9133 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9134
9135 if (!NeedsBswap)
9136 return NewLoad;
9137
9138 SDValue ShiftedLoad =
9139 NeedsZext
9140 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9141 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9142 SDLoc(N), LegalOperations))
9143 : NewLoad;
9144 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9145}
9146
9147// If the target has andn, bsl, or a similar bit-select instruction,
9148// we want to unfold masked merge, with canonical pattern of:
9149// | A | |B|
9150// ((x ^ y) & m) ^ y
9151// | D |
9152// Into:
9153// (x & m) | (y & ~m)
9154// If y is a constant, m is not a 'not', and the 'andn' does not work with
9155// immediates, we unfold into a different pattern:
9156// ~(~x & m) & (m | y)
9157// If x is a constant, m is a 'not', and the 'andn' does not work with
9158// immediates, we unfold into a different pattern:
9159// (x | ~m) & ~(~m & ~y)
9160// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9161// the very least that breaks andnpd / andnps patterns, and because those
9162// patterns are simplified in IR and shouldn't be created in the DAG
9163SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9164 assert(N->getOpcode() == ISD::XOR);
9165
9166 // Don't touch 'not' (i.e. where y = -1).
9167 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9168 return SDValue();
9169
9170 EVT VT = N->getValueType(0);
9171
9172 // There are 3 commutable operators in the pattern,
9173 // so we have to deal with 8 possible variants of the basic pattern.
9174 SDValue X, Y, M;
9175 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9176 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9177 return false;
9178 SDValue Xor = And.getOperand(XorIdx);
9179 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9180 return false;
9181 SDValue Xor0 = Xor.getOperand(0);
9182 SDValue Xor1 = Xor.getOperand(1);
9183 // Don't touch 'not' (i.e. where y = -1).
9184 if (isAllOnesOrAllOnesSplat(Xor1))
9185 return false;
9186 if (Other == Xor0)
9187 std::swap(Xor0, Xor1);
9188 if (Other != Xor1)
9189 return false;
9190 X = Xor0;
9191 Y = Xor1;
9192 M = And.getOperand(XorIdx ? 0 : 1);
9193 return true;
9194 };
9195
9196 SDValue N0 = N->getOperand(0);
9197 SDValue N1 = N->getOperand(1);
9198 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9199 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9200 return SDValue();
9201
9202 // Don't do anything if the mask is constant. This should not be reachable.
9203 // InstCombine should have already unfolded this pattern, and DAGCombiner
9204 // probably shouldn't produce it, too.
9205 if (isa<ConstantSDNode>(M.getNode()))
9206 return SDValue();
9207
9208 // We can transform if the target has AndNot
9209 if (!TLI.hasAndNot(M))
9210 return SDValue();
9211
9212 SDLoc DL(N);
9213
9214 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9215 // a bitwise not that would already allow ANDN to be used.
9216 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9217 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9218 // If not, we need to do a bit more work to make sure andn is still used.
9219 SDValue NotX = DAG.getNOT(DL, X, VT);
9220 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9221 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9222 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9223 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9224 }
9225
9226 // If X is a constant and M is a bitwise not, check that 'andn' works with
9227 // immediates.
9228 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9229 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9230 // If not, we need to do a bit more work to make sure andn is still used.
9231 SDValue NotM = M.getOperand(0);
9232 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9233 SDValue NotY = DAG.getNOT(DL, Y, VT);
9234 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9235 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9236 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9237 }
9238
9239 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9240 SDValue NotM = DAG.getNOT(DL, M, VT);
9241 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9242
9243 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9244}
9245
9246SDValue DAGCombiner::visitXOR(SDNode *N) {
9247 SDValue N0 = N->getOperand(0);
9248 SDValue N1 = N->getOperand(1);
9249 EVT VT = N0.getValueType();
9250 SDLoc DL(N);
9251
9252 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9253 if (N0.isUndef() && N1.isUndef())
9254 return DAG.getConstant(0, DL, VT);
9255
9256 // fold (xor x, undef) -> undef
9257 if (N0.isUndef())
9258 return N0;
9259 if (N1.isUndef())
9260 return N1;
9261
9262 // fold (xor c1, c2) -> c1^c2
9263 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9264 return C;
9265
9266 // canonicalize constant to RHS
9269 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9270
9271 // fold vector ops
9272 if (VT.isVector()) {
9273 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9274 return FoldedVOp;
9275
9276 // fold (xor x, 0) -> x, vector edition
9278 return N0;
9279 }
9280
9281 // fold (xor x, 0) -> x
9282 if (isNullConstant(N1))
9283 return N0;
9284
9285 if (SDValue NewSel = foldBinOpIntoSelect(N))
9286 return NewSel;
9287
9288 // reassociate xor
9289 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9290 return RXOR;
9291
9292 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9293 if (SDValue SD =
9294 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9295 return SD;
9296
9297 // fold (a^b) -> (a|b) iff a and b share no bits.
9298 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9299 DAG.haveNoCommonBitsSet(N0, N1)) {
9301 Flags.setDisjoint(true);
9302 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9303 }
9304
9305 // look for 'add-like' folds:
9306 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9307 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9309 if (SDValue Combined = visitADDLike(N))
9310 return Combined;
9311
9312 // fold !(x cc y) -> (x !cc y)
9313 unsigned N0Opcode = N0.getOpcode();
9314 SDValue LHS, RHS, CC;
9315 if (TLI.isConstTrueVal(N1) &&
9316 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9317 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9318 LHS.getValueType());
9319 if (!LegalOperations ||
9320 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9321 switch (N0Opcode) {
9322 default:
9323 llvm_unreachable("Unhandled SetCC Equivalent!");
9324 case ISD::SETCC:
9325 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9326 case ISD::SELECT_CC:
9327 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9328 N0.getOperand(3), NotCC);
9329 case ISD::STRICT_FSETCC:
9330 case ISD::STRICT_FSETCCS: {
9331 if (N0.hasOneUse()) {
9332 // FIXME Can we handle multiple uses? Could we token factor the chain
9333 // results from the new/old setcc?
9334 SDValue SetCC =
9335 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9336 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9337 CombineTo(N, SetCC);
9338 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9339 recursivelyDeleteUnusedNodes(N0.getNode());
9340 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9341 }
9342 break;
9343 }
9344 }
9345 }
9346 }
9347
9348 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9349 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9350 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9351 SDValue V = N0.getOperand(0);
9352 SDLoc DL0(N0);
9353 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9354 DAG.getConstant(1, DL0, V.getValueType()));
9355 AddToWorklist(V.getNode());
9356 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9357 }
9358
9359 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9360 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9361 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9362 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9363 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9364 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9365 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9366 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9367 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9368 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9369 }
9370 }
9371 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9372 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9373 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9374 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9375 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9376 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9377 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9378 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9379 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9380 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9381 }
9382 }
9383
9384 // fold (not (neg x)) -> (add X, -1)
9385 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9386 // Y is a constant or the subtract has a single use.
9387 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9388 isNullConstant(N0.getOperand(0))) {
9389 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9390 DAG.getAllOnesConstant(DL, VT));
9391 }
9392
9393 // fold (not (add X, -1)) -> (neg X)
9394 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9396 return DAG.getNegative(N0.getOperand(0), DL, VT);
9397 }
9398
9399 // fold (xor (and x, y), y) -> (and (not x), y)
9400 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9401 SDValue X = N0.getOperand(0);
9402 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9403 AddToWorklist(NotX.getNode());
9404 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9405 }
9406
9407 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9408 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9409 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9410 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9411 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9412 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9413 SDValue S0 = S.getOperand(0);
9414 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9416 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9417 return DAG.getNode(ISD::ABS, DL, VT, S0);
9418 }
9419 }
9420
9421 // fold (xor x, x) -> 0
9422 if (N0 == N1)
9423 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9424
9425 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9426 // Here is a concrete example of this equivalence:
9427 // i16 x == 14
9428 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9429 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9430 //
9431 // =>
9432 //
9433 // i16 ~1 == 0b1111111111111110
9434 // i16 rol(~1, 14) == 0b1011111111111111
9435 //
9436 // Some additional tips to help conceptualize this transform:
9437 // - Try to see the operation as placing a single zero in a value of all ones.
9438 // - There exists no value for x which would allow the result to contain zero.
9439 // - Values of x larger than the bitwidth are undefined and do not require a
9440 // consistent result.
9441 // - Pushing the zero left requires shifting one bits in from the right.
9442 // A rotate left of ~1 is a nice way of achieving the desired result.
9443 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9445 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9446 N0.getOperand(1));
9447 }
9448
9449 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9450 if (N0Opcode == N1.getOpcode())
9451 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9452 return V;
9453
9454 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9455 return R;
9456 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9457 return R;
9458 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9459 return R;
9460
9461 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9462 if (SDValue MM = unfoldMaskedMerge(N))
9463 return MM;
9464
9465 // Simplify the expression using non-local knowledge.
9467 return SDValue(N, 0);
9468
9469 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9470 return Combined;
9471
9472 return SDValue();
9473}
9474
9475/// If we have a shift-by-constant of a bitwise logic op that itself has a
9476/// shift-by-constant operand with identical opcode, we may be able to convert
9477/// that into 2 independent shifts followed by the logic op. This is a
9478/// throughput improvement.
9480 // Match a one-use bitwise logic op.
9481 SDValue LogicOp = Shift->getOperand(0);
9482 if (!LogicOp.hasOneUse())
9483 return SDValue();
9484
9485 unsigned LogicOpcode = LogicOp.getOpcode();
9486 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9487 LogicOpcode != ISD::XOR)
9488 return SDValue();
9489
9490 // Find a matching one-use shift by constant.
9491 unsigned ShiftOpcode = Shift->getOpcode();
9492 SDValue C1 = Shift->getOperand(1);
9493 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9494 assert(C1Node && "Expected a shift with constant operand");
9495 const APInt &C1Val = C1Node->getAPIntValue();
9496 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9497 const APInt *&ShiftAmtVal) {
9498 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9499 return false;
9500
9501 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9502 if (!ShiftCNode)
9503 return false;
9504
9505 // Capture the shifted operand and shift amount value.
9506 ShiftOp = V.getOperand(0);
9507 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9508
9509 // Shift amount types do not have to match their operand type, so check that
9510 // the constants are the same width.
9511 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9512 return false;
9513
9514 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9515 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
9516 return false;
9517
9518 return true;
9519 };
9520
9521 // Logic ops are commutative, so check each operand for a match.
9522 SDValue X, Y;
9523 const APInt *C0Val;
9524 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9525 Y = LogicOp.getOperand(1);
9526 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9527 Y = LogicOp.getOperand(0);
9528 else
9529 return SDValue();
9530
9531 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9532 SDLoc DL(Shift);
9533 EVT VT = Shift->getValueType(0);
9534 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9535 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9536 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9537 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9538 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9539 LogicOp->getFlags());
9540}
9541
9542/// Handle transforms common to the three shifts, when the shift amount is a
9543/// constant.
9544/// We are looking for: (shift being one of shl/sra/srl)
9545/// shift (binop X, C0), C1
9546/// And want to transform into:
9547/// binop (shift X, C1), (shift C0, C1)
9548SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9549 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9550
9551 // Do not turn a 'not' into a regular xor.
9552 if (isBitwiseNot(N->getOperand(0)))
9553 return SDValue();
9554
9555 // The inner binop must be one-use, since we want to replace it.
9556 SDValue LHS = N->getOperand(0);
9557 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9558 return SDValue();
9559
9560 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9561 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9562 return R;
9563
9564 // We want to pull some binops through shifts, so that we have (and (shift))
9565 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9566 // thing happens with address calculations, so it's important to canonicalize
9567 // it.
9568 switch (LHS.getOpcode()) {
9569 default:
9570 return SDValue();
9571 case ISD::OR:
9572 case ISD::XOR:
9573 case ISD::AND:
9574 break;
9575 case ISD::ADD:
9576 if (N->getOpcode() != ISD::SHL)
9577 return SDValue(); // only shl(add) not sr[al](add).
9578 break;
9579 }
9580
9581 // FIXME: disable this unless the input to the binop is a shift by a constant
9582 // or is copy/select. Enable this in other cases when figure out it's exactly
9583 // profitable.
9584 SDValue BinOpLHSVal = LHS.getOperand(0);
9585 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9586 BinOpLHSVal.getOpcode() == ISD::SRA ||
9587 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9588 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9589 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9590 BinOpLHSVal.getOpcode() == ISD::SELECT;
9591
9592 if (!IsShiftByConstant && !IsCopyOrSelect)
9593 return SDValue();
9594
9595 if (IsCopyOrSelect && N->hasOneUse())
9596 return SDValue();
9597
9598 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9599 SDLoc DL(N);
9600 EVT VT = N->getValueType(0);
9601 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9602 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9603 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9604 N->getOperand(1));
9605 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9606 }
9607
9608 return SDValue();
9609}
9610
9611SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9612 assert(N->getOpcode() == ISD::TRUNCATE);
9613 assert(N->getOperand(0).getOpcode() == ISD::AND);
9614
9615 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9616 EVT TruncVT = N->getValueType(0);
9617 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9618 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9619 SDValue N01 = N->getOperand(0).getOperand(1);
9620 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9621 SDLoc DL(N);
9622 SDValue N00 = N->getOperand(0).getOperand(0);
9623 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9624 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9625 AddToWorklist(Trunc00.getNode());
9626 AddToWorklist(Trunc01.getNode());
9627 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9628 }
9629 }
9630
9631 return SDValue();
9632}
9633
9634SDValue DAGCombiner::visitRotate(SDNode *N) {
9635 SDLoc dl(N);
9636 SDValue N0 = N->getOperand(0);
9637 SDValue N1 = N->getOperand(1);
9638 EVT VT = N->getValueType(0);
9639 unsigned Bitsize = VT.getScalarSizeInBits();
9640
9641 // fold (rot x, 0) -> x
9642 if (isNullOrNullSplat(N1))
9643 return N0;
9644
9645 // fold (rot x, c) -> x iff (c % BitSize) == 0
9646 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9647 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9648 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9649 return N0;
9650 }
9651
9652 // fold (rot x, c) -> (rot x, c % BitSize)
9653 bool OutOfRange = false;
9654 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9655 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9656 return true;
9657 };
9658 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9659 EVT AmtVT = N1.getValueType();
9660 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9661 if (SDValue Amt =
9662 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9663 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9664 }
9665
9666 // rot i16 X, 8 --> bswap X
9667 auto *RotAmtC = isConstOrConstSplat(N1);
9668 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9669 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9670 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9671
9672 // Simplify the operands using demanded-bits information.
9674 return SDValue(N, 0);
9675
9676 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9677 if (N1.getOpcode() == ISD::TRUNCATE &&
9678 N1.getOperand(0).getOpcode() == ISD::AND) {
9679 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9680 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9681 }
9682
9683 unsigned NextOp = N0.getOpcode();
9684
9685 // fold (rot* (rot* x, c2), c1)
9686 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9687 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9690 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9691 EVT ShiftVT = C1->getValueType(0);
9692 bool SameSide = (N->getOpcode() == NextOp);
9693 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9694 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9695 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9696 {N1, BitsizeC});
9697 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9698 {N0.getOperand(1), BitsizeC});
9699 if (Norm1 && Norm2)
9700 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9701 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9702 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9703 {CombinedShift, BitsizeC});
9704 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9705 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9706 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9707 CombinedShiftNorm);
9708 }
9709 }
9710 }
9711 return SDValue();
9712}
9713
9714SDValue DAGCombiner::visitSHL(SDNode *N) {
9715 SDValue N0 = N->getOperand(0);
9716 SDValue N1 = N->getOperand(1);
9717 if (SDValue V = DAG.simplifyShift(N0, N1))
9718 return V;
9719
9720 EVT VT = N0.getValueType();
9721 EVT ShiftVT = N1.getValueType();
9722 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9723
9724 // fold (shl c1, c2) -> c1<<c2
9725 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
9726 return C;
9727
9728 // fold vector ops
9729 if (VT.isVector()) {
9730 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9731 return FoldedVOp;
9732
9733 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9734 // If setcc produces all-one true value then:
9735 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9736 if (N1CV && N1CV->isConstant()) {
9737 if (N0.getOpcode() == ISD::AND) {
9738 SDValue N00 = N0->getOperand(0);
9739 SDValue N01 = N0->getOperand(1);
9740 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9741
9742 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9745 if (SDValue C =
9746 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
9747 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
9748 }
9749 }
9750 }
9751 }
9752
9753 if (SDValue NewSel = foldBinOpIntoSelect(N))
9754 return NewSel;
9755
9756 // if (shl x, c) is known to be zero, return 0
9757 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9758 return DAG.getConstant(0, SDLoc(N), VT);
9759
9760 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9761 if (N1.getOpcode() == ISD::TRUNCATE &&
9762 N1.getOperand(0).getOpcode() == ISD::AND) {
9763 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9764 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
9765 }
9766
9767 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9768 if (N0.getOpcode() == ISD::SHL) {
9769 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9771 APInt c1 = LHS->getAPIntValue();
9772 APInt c2 = RHS->getAPIntValue();
9773 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9774 return (c1 + c2).uge(OpSizeInBits);
9775 };
9776 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9777 return DAG.getConstant(0, SDLoc(N), VT);
9778
9779 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9781 APInt c1 = LHS->getAPIntValue();
9782 APInt c2 = RHS->getAPIntValue();
9783 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9784 return (c1 + c2).ult(OpSizeInBits);
9785 };
9786 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9787 SDLoc DL(N);
9788 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9789 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9790 }
9791 }
9792
9793 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9794 // For this to be valid, the second form must not preserve any of the bits
9795 // that are shifted out by the inner shift in the first form. This means
9796 // the outer shift size must be >= the number of bits added by the ext.
9797 // As a corollary, we don't care what kind of ext it is.
9798 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9799 N0.getOpcode() == ISD::ANY_EXTEND ||
9800 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9801 N0.getOperand(0).getOpcode() == ISD::SHL) {
9802 SDValue N0Op0 = N0.getOperand(0);
9803 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9804 EVT InnerVT = N0Op0.getValueType();
9805 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9806
9807 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9809 APInt c1 = LHS->getAPIntValue();
9810 APInt c2 = RHS->getAPIntValue();
9811 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9812 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9813 (c1 + c2).uge(OpSizeInBits);
9814 };
9815 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9816 /*AllowUndefs*/ false,
9817 /*AllowTypeMismatch*/ true))
9818 return DAG.getConstant(0, SDLoc(N), VT);
9819
9820 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9822 APInt c1 = LHS->getAPIntValue();
9823 APInt c2 = RHS->getAPIntValue();
9824 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9825 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9826 (c1 + c2).ult(OpSizeInBits);
9827 };
9828 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9829 /*AllowUndefs*/ false,
9830 /*AllowTypeMismatch*/ true)) {
9831 SDLoc DL(N);
9832 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9833 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9834 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9835 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9836 }
9837 }
9838
9839 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9840 // Only fold this if the inner zext has no other uses to avoid increasing
9841 // the total number of instructions.
9842 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9843 N0.getOperand(0).getOpcode() == ISD::SRL) {
9844 SDValue N0Op0 = N0.getOperand(0);
9845 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9846
9847 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9848 APInt c1 = LHS->getAPIntValue();
9849 APInt c2 = RHS->getAPIntValue();
9850 zeroExtendToMatch(c1, c2);
9851 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9852 };
9853 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9854 /*AllowUndefs*/ false,
9855 /*AllowTypeMismatch*/ true)) {
9856 SDLoc DL(N);
9857 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9858 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9859 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9860 AddToWorklist(NewSHL.getNode());
9861 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9862 }
9863 }
9864
9865 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9866 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9868 const APInt &LHSC = LHS->getAPIntValue();
9869 const APInt &RHSC = RHS->getAPIntValue();
9870 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9871 LHSC.getZExtValue() <= RHSC.getZExtValue();
9872 };
9873
9874 SDLoc DL(N);
9875
9876 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
9877 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9878 if (N0->getFlags().hasExact()) {
9879 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9880 /*AllowUndefs*/ false,
9881 /*AllowTypeMismatch*/ true)) {
9882 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9883 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9884 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9885 }
9886 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9887 /*AllowUndefs*/ false,
9888 /*AllowTypeMismatch*/ true)) {
9889 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9890 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9891 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
9892 }
9893 }
9894
9895 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
9896 // (and (srl x, (sub c1, c2), MASK)
9897 // Only fold this if the inner shift has no other uses -- if it does,
9898 // folding this will increase the total number of instructions.
9899 if (N0.getOpcode() == ISD::SRL &&
9900 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
9902 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9903 /*AllowUndefs*/ false,
9904 /*AllowTypeMismatch*/ true)) {
9905 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9906 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9907 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9908 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9909 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9910 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9911 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9912 }
9913 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9914 /*AllowUndefs*/ false,
9915 /*AllowTypeMismatch*/ true)) {
9916 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9917 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9918 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9919 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9920 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9921 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9922 }
9923 }
9924 }
9925
9926 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9927 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9928 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9929 SDLoc DL(N);
9930 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9931 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9932 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9933 }
9934
9935 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9936 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9937 // Variant of version done on multiply, except mul by a power of 2 is turned
9938 // into a shift.
9939 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9940 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
9941 SDValue N01 = N0.getOperand(1);
9942 if (SDValue Shl1 =
9943 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
9944 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9945 AddToWorklist(Shl0.getNode());
9947 // Preserve the disjoint flag for Or.
9948 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
9949 Flags.setDisjoint(true);
9950 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags);
9951 }
9952 }
9953
9954 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
9955 // TODO: Add zext/add_nuw variant with suitable test coverage
9956 // TODO: Should we limit this with isLegalAddImmediate?
9957 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
9958 N0.getOperand(0).getOpcode() == ISD::ADD &&
9959 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
9960 N0.getOperand(0)->hasOneUse() &&
9961 TLI.isDesirableToCommuteWithShift(N, Level)) {
9962 SDValue Add = N0.getOperand(0);
9963 SDLoc DL(N0);
9964 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
9965 {Add.getOperand(1)})) {
9966 if (SDValue ShlC =
9967 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
9968 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
9969 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
9970 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
9971 }
9972 }
9973 }
9974
9975 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9976 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
9977 SDValue N01 = N0.getOperand(1);
9978 if (SDValue Shl =
9979 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
9980 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
9981 }
9982
9984 if (N1C && !N1C->isOpaque())
9985 if (SDValue NewSHL = visitShiftByConstant(N))
9986 return NewSHL;
9987
9989 return SDValue(N, 0);
9990
9991 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
9992 if (N0.getOpcode() == ISD::VSCALE && N1C) {
9993 const APInt &C0 = N0.getConstantOperandAPInt(0);
9994 const APInt &C1 = N1C->getAPIntValue();
9995 return DAG.getVScale(SDLoc(N), VT, C0 << C1);
9996 }
9997
9998 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
9999 APInt ShlVal;
10000 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10001 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10002 const APInt &C0 = N0.getConstantOperandAPInt(0);
10003 if (ShlVal.ult(C0.getBitWidth())) {
10004 APInt NewStep = C0 << ShlVal;
10005 return DAG.getStepVector(SDLoc(N), VT, NewStep);
10006 }
10007 }
10008
10009 return SDValue();
10010}
10011
10012// Transform a right shift of a multiply into a multiply-high.
10013// Examples:
10014// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10015// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10017 const TargetLowering &TLI) {
10018 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10019 "SRL or SRA node is required here!");
10020
10021 // Check the shift amount. Proceed with the transformation if the shift
10022 // amount is constant.
10023 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10024 if (!ShiftAmtSrc)
10025 return SDValue();
10026
10027 SDLoc DL(N);
10028
10029 // The operation feeding into the shift must be a multiply.
10030 SDValue ShiftOperand = N->getOperand(0);
10031 if (ShiftOperand.getOpcode() != ISD::MUL)
10032 return SDValue();
10033
10034 // Both operands must be equivalent extend nodes.
10035 SDValue LeftOp = ShiftOperand.getOperand(0);
10036 SDValue RightOp = ShiftOperand.getOperand(1);
10037
10038 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10039 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10040
10041 if (!IsSignExt && !IsZeroExt)
10042 return SDValue();
10043
10044 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10045 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10046
10047 // return true if U may use the lower bits of its operands
10048 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10049 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10050 return true;
10051 }
10052 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10053 if (!UShiftAmtSrc) {
10054 return true;
10055 }
10056 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10057 return UShiftAmt < NarrowVTSize;
10058 };
10059
10060 // If the lower part of the MUL is also used and MUL_LOHI is supported
10061 // do not introduce the MULH in favor of MUL_LOHI
10062 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10063 if (!ShiftOperand.hasOneUse() &&
10064 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10065 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10066 return SDValue();
10067 }
10068
10069 SDValue MulhRightOp;
10071 unsigned ActiveBits = IsSignExt
10072 ? Constant->getAPIntValue().getSignificantBits()
10073 : Constant->getAPIntValue().getActiveBits();
10074 if (ActiveBits > NarrowVTSize)
10075 return SDValue();
10076 MulhRightOp = DAG.getConstant(
10077 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10078 NarrowVT);
10079 } else {
10080 if (LeftOp.getOpcode() != RightOp.getOpcode())
10081 return SDValue();
10082 // Check that the two extend nodes are the same type.
10083 if (NarrowVT != RightOp.getOperand(0).getValueType())
10084 return SDValue();
10085 MulhRightOp = RightOp.getOperand(0);
10086 }
10087
10088 EVT WideVT = LeftOp.getValueType();
10089 // Proceed with the transformation if the wide types match.
10090 assert((WideVT == RightOp.getValueType()) &&
10091 "Cannot have a multiply node with two different operand types.");
10092
10093 // Proceed with the transformation if the wide type is twice as large
10094 // as the narrow type.
10095 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10096 return SDValue();
10097
10098 // Check the shift amount with the narrow type size.
10099 // Proceed with the transformation if the shift amount is the width
10100 // of the narrow type.
10101 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10102 if (ShiftAmt != NarrowVTSize)
10103 return SDValue();
10104
10105 // If the operation feeding into the MUL is a sign extend (sext),
10106 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10107 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10108
10109 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10110 // or if it is a vector type then we could transform to an acceptable type and
10111 // rely on legalization to split/combine the result.
10112 if (NarrowVT.isVector()) {
10113 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10114 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10115 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10116 return SDValue();
10117 } else {
10118 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10119 return SDValue();
10120 }
10121
10122 SDValue Result =
10123 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10124 bool IsSigned = N->getOpcode() == ISD::SRA;
10125 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10126}
10127
10128// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10129// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10131 unsigned Opcode = N->getOpcode();
10132 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10133 return SDValue();
10134
10135 SDValue N0 = N->getOperand(0);
10136 EVT VT = N->getValueType(0);
10137 SDLoc DL(N);
10138 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10139 SDValue OldLHS = N0.getOperand(0);
10140 SDValue OldRHS = N0.getOperand(1);
10141
10142 // If both operands are bswap/bitreverse, ignore the multiuse
10143 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10144 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10145 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10146 OldRHS.getOperand(0));
10147 }
10148
10149 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10150 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10151 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10152 NewBitReorder);
10153 }
10154
10155 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10156 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10157 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10158 OldRHS.getOperand(0));
10159 }
10160 }
10161 return SDValue();
10162}
10163
10164SDValue DAGCombiner::visitSRA(SDNode *N) {
10165 SDValue N0 = N->getOperand(0);
10166 SDValue N1 = N->getOperand(1);
10167 if (SDValue V = DAG.simplifyShift(N0, N1))
10168 return V;
10169
10170 EVT VT = N0.getValueType();
10171 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10172
10173 // fold (sra c1, c2) -> (sra c1, c2)
10174 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
10175 return C;
10176
10177 // Arithmetic shifting an all-sign-bit value is a no-op.
10178 // fold (sra 0, x) -> 0
10179 // fold (sra -1, x) -> -1
10180 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10181 return N0;
10182
10183 // fold vector ops
10184 if (VT.isVector())
10185 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10186 return FoldedVOp;
10187
10188 if (SDValue NewSel = foldBinOpIntoSelect(N))
10189 return NewSel;
10190
10192
10193 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10194 // clamp (add c1, c2) to max shift.
10195 if (N0.getOpcode() == ISD::SRA) {
10196 SDLoc DL(N);
10197 EVT ShiftVT = N1.getValueType();
10198 EVT ShiftSVT = ShiftVT.getScalarType();
10199 SmallVector<SDValue, 16> ShiftValues;
10200
10201 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10202 APInt c1 = LHS->getAPIntValue();
10203 APInt c2 = RHS->getAPIntValue();
10204 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10205 APInt Sum = c1 + c2;
10206 unsigned ShiftSum =
10207 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10208 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10209 return true;
10210 };
10211 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10212 SDValue ShiftValue;
10213 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10214 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10215 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10216 assert(ShiftValues.size() == 1 &&
10217 "Expected matchBinaryPredicate to return one element for "
10218 "SPLAT_VECTORs");
10219 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10220 } else
10221 ShiftValue = ShiftValues[0];
10222 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10223 }
10224 }
10225
10226 // fold (sra (shl X, m), (sub result_size, n))
10227 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10228 // result_size - n != m.
10229 // If truncate is free for the target sext(shl) is likely to result in better
10230 // code.
10231 if (N0.getOpcode() == ISD::SHL && N1C) {
10232 // Get the two constants of the shifts, CN0 = m, CN = n.
10233 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10234 if (N01C) {
10235 LLVMContext &Ctx = *DAG.getContext();
10236 // Determine what the truncate's result bitsize and type would be.
10237 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10238
10239 if (VT.isVector())
10240 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10241
10242 // Determine the residual right-shift amount.
10243 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10244
10245 // If the shift is not a no-op (in which case this should be just a sign
10246 // extend already), the truncated to type is legal, sign_extend is legal
10247 // on that type, and the truncate to that type is both legal and free,
10248 // perform the transform.
10249 if ((ShiftAmt > 0) &&
10252 TLI.isTruncateFree(VT, TruncVT)) {
10253 SDLoc DL(N);
10254 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10256 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10257 N0.getOperand(0), Amt);
10258 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10259 Shift);
10260 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10261 N->getValueType(0), Trunc);
10262 }
10263 }
10264 }
10265
10266 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10267 // sra (add (shl X, N1C), AddC), N1C -->
10268 // sext (add (trunc X to (width - N1C)), AddC')
10269 // sra (sub AddC, (shl X, N1C)), N1C -->
10270 // sext (sub AddC1',(trunc X to (width - N1C)))
10271 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10272 N0.hasOneUse()) {
10273 bool IsAdd = N0.getOpcode() == ISD::ADD;
10274 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10275 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10276 Shl.hasOneUse()) {
10277 // TODO: AddC does not need to be a splat.
10278 if (ConstantSDNode *AddC =
10279 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10280 // Determine what the truncate's type would be and ask the target if
10281 // that is a free operation.
10282 LLVMContext &Ctx = *DAG.getContext();
10283 unsigned ShiftAmt = N1C->getZExtValue();
10284 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10285 if (VT.isVector())
10286 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10287
10288 // TODO: The simple type check probably belongs in the default hook
10289 // implementation and/or target-specific overrides (because
10290 // non-simple types likely require masking when legalized), but
10291 // that restriction may conflict with other transforms.
10292 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10293 TLI.isTruncateFree(VT, TruncVT)) {
10294 SDLoc DL(N);
10295 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10296 SDValue ShiftC =
10297 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10298 TruncVT.getScalarSizeInBits()),
10299 DL, TruncVT);
10300 SDValue Add;
10301 if (IsAdd)
10302 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10303 else
10304 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10305 return DAG.getSExtOrTrunc(Add, DL, VT);
10306 }
10307 }
10308 }
10309 }
10310
10311 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10312 if (N1.getOpcode() == ISD::TRUNCATE &&
10313 N1.getOperand(0).getOpcode() == ISD::AND) {
10314 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10315 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
10316 }
10317
10318 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10319 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10320 // if c1 is equal to the number of bits the trunc removes
10321 // TODO - support non-uniform vector shift amounts.
10322 if (N0.getOpcode() == ISD::TRUNCATE &&
10323 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10324 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10325 N0.getOperand(0).hasOneUse() &&
10326 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10327 SDValue N0Op0 = N0.getOperand(0);
10328 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10329 EVT LargeVT = N0Op0.getValueType();
10330 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10331 if (LargeShift->getAPIntValue() == TruncBits) {
10332 SDLoc DL(N);
10333 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10334 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10335 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10336 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10337 SDValue SRA =
10338 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10339 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10340 }
10341 }
10342 }
10343
10344 // Simplify, based on bits shifted out of the LHS.
10346 return SDValue(N, 0);
10347
10348 // If the sign bit is known to be zero, switch this to a SRL.
10349 if (DAG.SignBitIsZero(N0))
10350 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
10351
10352 if (N1C && !N1C->isOpaque())
10353 if (SDValue NewSRA = visitShiftByConstant(N))
10354 return NewSRA;
10355
10356 // Try to transform this shift into a multiply-high if
10357 // it matches the appropriate pattern detected in combineShiftToMULH.
10358 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10359 return MULH;
10360
10361 // Attempt to convert a sra of a load into a narrower sign-extending load.
10362 if (SDValue NarrowLoad = reduceLoadWidth(N))
10363 return NarrowLoad;
10364
10365 return SDValue();
10366}
10367
10368SDValue DAGCombiner::visitSRL(SDNode *N) {
10369 SDValue N0 = N->getOperand(0);
10370 SDValue N1 = N->getOperand(1);
10371 if (SDValue V = DAG.simplifyShift(N0, N1))
10372 return V;
10373
10374 EVT VT = N0.getValueType();
10375 EVT ShiftVT = N1.getValueType();
10376 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10377
10378 // fold (srl c1, c2) -> c1 >>u c2
10379 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
10380 return C;
10381
10382 // fold vector ops
10383 if (VT.isVector())
10384 if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10385 return FoldedVOp;
10386
10387 if (SDValue NewSel = foldBinOpIntoSelect(N))
10388 return NewSel;
10389
10390 // if (srl x, c) is known to be zero, return 0
10392 if (N1C &&
10393 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10394 return DAG.getConstant(0, SDLoc(N), VT);
10395
10396 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10397 if (N0.getOpcode() == ISD::SRL) {
10398 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10400 APInt c1 = LHS->getAPIntValue();
10401 APInt c2 = RHS->getAPIntValue();
10402 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10403 return (c1 + c2).uge(OpSizeInBits);
10404 };
10405 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10406 return DAG.getConstant(0, SDLoc(N), VT);
10407
10408 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10410 APInt c1 = LHS->getAPIntValue();
10411 APInt c2 = RHS->getAPIntValue();
10412 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10413 return (c1 + c2).ult(OpSizeInBits);
10414 };
10415 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10416 SDLoc DL(N);
10417 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10418 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10419 }
10420 }
10421
10422 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10423 N0.getOperand(0).getOpcode() == ISD::SRL) {
10424 SDValue InnerShift = N0.getOperand(0);
10425 // TODO - support non-uniform vector shift amounts.
10426 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10427 uint64_t c1 = N001C->getZExtValue();
10428 uint64_t c2 = N1C->getZExtValue();
10429 EVT InnerShiftVT = InnerShift.getValueType();
10430 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10431 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10432 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10433 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10434 if (c1 + OpSizeInBits == InnerShiftSize) {
10435 SDLoc DL(N);
10436 if (c1 + c2 >= InnerShiftSize)
10437 return DAG.getConstant(0, DL, VT);
10438 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10439 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10440 InnerShift.getOperand(0), NewShiftAmt);
10441 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10442 }
10443 // In the more general case, we can clear the high bits after the shift:
10444 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10445 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10446 c1 + c2 < InnerShiftSize) {
10447 SDLoc DL(N);
10448 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10449 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10450 InnerShift.getOperand(0), NewShiftAmt);
10451 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10452 OpSizeInBits - c2),
10453 DL, InnerShiftVT);
10454 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10455 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10456 }
10457 }
10458 }
10459
10460 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10461 // (and (srl x, (sub c2, c1), MASK)
10462 if (N0.getOpcode() == ISD::SHL &&
10463 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10465 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10467 const APInt &LHSC = LHS->getAPIntValue();
10468 const APInt &RHSC = RHS->getAPIntValue();
10469 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10470 LHSC.getZExtValue() <= RHSC.getZExtValue();
10471 };
10472 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10473 /*AllowUndefs*/ false,
10474 /*AllowTypeMismatch*/ true)) {
10475 SDLoc DL(N);
10476 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10477 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10478 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10479 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10480 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10481 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10482 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10483 }
10484 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10485 /*AllowUndefs*/ false,
10486 /*AllowTypeMismatch*/ true)) {
10487 SDLoc DL(N);
10488 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10489 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10490 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10491 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10492 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10493 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10494 }
10495 }
10496
10497 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10498 // TODO - support non-uniform vector shift amounts.
10499 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10500 // Shifting in all undef bits?
10501 EVT SmallVT = N0.getOperand(0).getValueType();
10502 unsigned BitSize = SmallVT.getScalarSizeInBits();
10503 if (N1C->getAPIntValue().uge(BitSize))
10504 return DAG.getUNDEF(VT);
10505
10506 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10507 uint64_t ShiftAmt = N1C->getZExtValue();
10508 SDLoc DL0(N0);
10509 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10510 N0.getOperand(0),
10511 DAG.getConstant(ShiftAmt, DL0,
10512 getShiftAmountTy(SmallVT)));
10513 AddToWorklist(SmallShift.getNode());
10514 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10515 SDLoc DL(N);
10516 return DAG.getNode(ISD::AND, DL, VT,
10517 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10518 DAG.getConstant(Mask, DL, VT));
10519 }
10520 }
10521
10522 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10523 // bit, which is unmodified by sra.
10524 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10525 if (N0.getOpcode() == ISD::SRA)
10526 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
10527 }
10528
10529 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10530 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10531 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10532 isPowerOf2_32(OpSizeInBits) &&
10533 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10534 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10535
10536 // If any of the input bits are KnownOne, then the input couldn't be all
10537 // zeros, thus the result of the srl will always be zero.
10538 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10539
10540 // If all of the bits input the to ctlz node are known to be zero, then
10541 // the result of the ctlz is "32" and the result of the shift is one.
10542 APInt UnknownBits = ~Known.Zero;
10543 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10544
10545 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10546 if (UnknownBits.isPowerOf2()) {
10547 // Okay, we know that only that the single bit specified by UnknownBits
10548 // could be set on input to the CTLZ node. If this bit is set, the SRL
10549 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10550 // to an SRL/XOR pair, which is likely to simplify more.
10551 unsigned ShAmt = UnknownBits.countr_zero();
10552 SDValue Op = N0.getOperand(0);
10553
10554 if (ShAmt) {
10555 SDLoc DL(N0);
10556 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10557 DAG.getConstant(ShAmt, DL,
10558 getShiftAmountTy(Op.getValueType())));
10559 AddToWorklist(Op.getNode());
10560 }
10561
10562 SDLoc DL(N);
10563 return DAG.getNode(ISD::XOR, DL, VT,
10564 Op, DAG.getConstant(1, DL, VT));
10565 }
10566 }
10567
10568 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10569 if (N1.getOpcode() == ISD::TRUNCATE &&
10570 N1.getOperand(0).getOpcode() == ISD::AND) {
10571 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10572 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
10573 }
10574
10575 // fold operands of srl based on knowledge that the low bits are not
10576 // demanded.
10578 return SDValue(N, 0);
10579
10580 if (N1C && !N1C->isOpaque())
10581 if (SDValue NewSRL = visitShiftByConstant(N))
10582 return NewSRL;
10583
10584 // Attempt to convert a srl of a load into a narrower zero-extending load.
10585 if (SDValue NarrowLoad = reduceLoadWidth(N))
10586 return NarrowLoad;
10587
10588 // Here is a common situation. We want to optimize:
10589 //
10590 // %a = ...
10591 // %b = and i32 %a, 2
10592 // %c = srl i32 %b, 1
10593 // brcond i32 %c ...
10594 //
10595 // into
10596 //
10597 // %a = ...
10598 // %b = and %a, 2
10599 // %c = setcc eq %b, 0
10600 // brcond %c ...
10601 //
10602 // However when after the source operand of SRL is optimized into AND, the SRL
10603 // itself may not be optimized further. Look for it and add the BRCOND into
10604 // the worklist.
10605 //
10606 // The also tends to happen for binary operations when SimplifyDemandedBits
10607 // is involved.
10608 //
10609 // FIXME: This is unecessary if we process the DAG in topological order,
10610 // which we plan to do. This workaround can be removed once the DAG is
10611 // processed in topological order.
10612 if (N->hasOneUse()) {
10613 SDNode *Use = *N->use_begin();
10614
10615 // Look pass the truncate.
10616 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10617 Use = *Use->use_begin();
10618
10619 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10620 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10621 AddToWorklist(Use);
10622 }
10623
10624 // Try to transform this shift into a multiply-high if
10625 // it matches the appropriate pattern detected in combineShiftToMULH.
10626 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10627 return MULH;
10628
10629 return SDValue();
10630}
10631
10632SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10633 EVT VT = N->getValueType(0);
10634 SDValue N0 = N->getOperand(0);
10635 SDValue N1 = N->getOperand(1);
10636 SDValue N2 = N->getOperand(2);
10637 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10638 unsigned BitWidth = VT.getScalarSizeInBits();
10639
10640 // fold (fshl N0, N1, 0) -> N0
10641 // fold (fshr N0, N1, 0) -> N1
10643 if (DAG.MaskedValueIsZero(
10644 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10645 return IsFSHL ? N0 : N1;
10646
10647 auto IsUndefOrZero = [](SDValue V) {
10648 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10649 };
10650
10651 // TODO - support non-uniform vector shift amounts.
10652 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10653 EVT ShAmtTy = N2.getValueType();
10654
10655 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10656 if (Cst->getAPIntValue().uge(BitWidth)) {
10657 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10658 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10659 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10660 }
10661
10662 unsigned ShAmt = Cst->getZExtValue();
10663 if (ShAmt == 0)
10664 return IsFSHL ? N0 : N1;
10665
10666 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10667 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10668 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10669 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10670 if (IsUndefOrZero(N0))
10671 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10672 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10673 SDLoc(N), ShAmtTy));
10674 if (IsUndefOrZero(N1))
10675 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10676 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10677 SDLoc(N), ShAmtTy));
10678
10679 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10680 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10681 // TODO - bigendian support once we have test coverage.
10682 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10683 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10684 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10685 !DAG.getDataLayout().isBigEndian()) {
10686 auto *LHS = dyn_cast<LoadSDNode>(N0);
10687 auto *RHS = dyn_cast<LoadSDNode>(N1);
10688 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10689 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10690 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10691 ISD::isNON_EXTLoad(LHS)) {
10692 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10693 SDLoc DL(RHS);
10694 uint64_t PtrOff =
10695 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10696 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10697 unsigned Fast = 0;
10698 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10699 RHS->getAddressSpace(), NewAlign,
10700 RHS->getMemOperand()->getFlags(), &Fast) &&
10701 Fast) {
10702 SDValue NewPtr = DAG.getMemBasePlusOffset(
10703 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10704 AddToWorklist(NewPtr.getNode());
10705 SDValue Load = DAG.getLoad(
10706 VT, DL, RHS->getChain(), NewPtr,
10707 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10708 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10709 // Replace the old load's chain with the new load's chain.
10710 WorklistRemover DeadNodes(*this);
10711 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10712 return Load;
10713 }
10714 }
10715 }
10716 }
10717 }
10718
10719 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10720 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10721 // iff We know the shift amount is in range.
10722 // TODO: when is it worth doing SUB(BW, N2) as well?
10723 if (isPowerOf2_32(BitWidth)) {
10724 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10725 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10726 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10727 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10728 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10729 }
10730
10731 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10732 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10733 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10734 // is legal as well we might be better off avoiding non-constant (BW - N2).
10735 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10736 if (N0 == N1 && hasOperation(RotOpc, VT))
10737 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10738
10739 // Simplify, based on bits shifted out of N0/N1.
10741 return SDValue(N, 0);
10742
10743 return SDValue();
10744}
10745
10746SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10747 SDValue N0 = N->getOperand(0);
10748 SDValue N1 = N->getOperand(1);
10749 if (SDValue V = DAG.simplifyShift(N0, N1))
10750 return V;
10751
10752 EVT VT = N0.getValueType();
10753
10754 // fold (*shlsat c1, c2) -> c1<<c2
10755 if (SDValue C =
10756 DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
10757 return C;
10758
10760
10761 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10762 // fold (sshlsat x, c) -> (shl x, c)
10763 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10764 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10765 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10766
10767 // fold (ushlsat x, c) -> (shl x, c)
10768 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10769 N1C->getAPIntValue().ule(
10771 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10772 }
10773
10774 return SDValue();
10775}
10776
10777// Given a ABS node, detect the following patterns:
10778// (ABS (SUB (EXTEND a), (EXTEND b))).
10779// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10780// Generates UABD/SABD instruction.
10781SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10782 EVT SrcVT = N->getValueType(0);
10783
10784 if (N->getOpcode() == ISD::TRUNCATE)
10785 N = N->getOperand(0).getNode();
10786
10787 if (N->getOpcode() != ISD::ABS)
10788 return SDValue();
10789
10790 EVT VT = N->getValueType(0);
10791 SDValue AbsOp1 = N->getOperand(0);
10792 SDValue Op0, Op1;
10793
10794 if (AbsOp1.getOpcode() != ISD::SUB)
10795 return SDValue();
10796
10797 Op0 = AbsOp1.getOperand(0);
10798 Op1 = AbsOp1.getOperand(1);
10799
10800 unsigned Opc0 = Op0.getOpcode();
10801
10802 // Check if the operands of the sub are (zero|sign)-extended.
10803 // TODO: Should we use ValueTracking instead?
10804 if (Opc0 != Op1.getOpcode() ||
10805 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10806 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10807 // fold (abs (sub nsw x, y)) -> abds(x, y)
10808 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10809 TLI.preferABDSToABSWithNSW(VT)) {
10810 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10811 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10812 }
10813 return SDValue();
10814 }
10815
10816 EVT VT0, VT1;
10817 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10818 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10819 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10820 } else {
10821 VT0 = Op0.getOperand(0).getValueType();
10822 VT1 = Op1.getOperand(0).getValueType();
10823 }
10824 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10825
10826 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10827 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10828 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10829 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10830 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10831 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10832 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10833 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10834 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10835 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10836 }
10837
10838 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10839 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10840 if (hasOperation(ABDOpcode, VT)) {
10841 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10842 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10843 }
10844
10845 return SDValue();
10846}
10847
10848SDValue DAGCombiner::visitABS(SDNode *N) {
10849 SDValue N0 = N->getOperand(0);
10850 EVT VT = N->getValueType(0);
10851 SDLoc DL(N);
10852
10853 // fold (abs c1) -> c2
10854 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10855 return C;
10856 // fold (abs (abs x)) -> (abs x)
10857 if (N0.getOpcode() == ISD::ABS)
10858 return N0;
10859 // fold (abs x) -> x iff not-negative
10860 if (DAG.SignBitIsZero(N0))
10861 return N0;
10862
10863 if (SDValue ABD = foldABSToABD(N, DL))
10864 return ABD;
10865
10866 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10867 // iff zero_extend/truncate are free.
10868 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10869 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10870 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10871 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10872 hasOperation(ISD::ABS, ExtVT)) {
10873 return DAG.getNode(
10874 ISD::ZERO_EXTEND, DL, VT,
10875 DAG.getNode(ISD::ABS, DL, ExtVT,
10876 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10877 }
10878 }
10879
10880 return SDValue();
10881}
10882
10883SDValue DAGCombiner::visitBSWAP(SDNode *N) {
10884 SDValue N0 = N->getOperand(0);
10885 EVT VT = N->getValueType(0);
10886 SDLoc DL(N);
10887
10888 // fold (bswap c1) -> c2
10889 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
10890 return C;
10891 // fold (bswap (bswap x)) -> x
10892 if (N0.getOpcode() == ISD::BSWAP)
10893 return N0.getOperand(0);
10894
10895 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
10896 // isn't supported, it will be expanded to bswap followed by a manual reversal
10897 // of bits in each byte. By placing bswaps before bitreverse, we can remove
10898 // the two bswaps if the bitreverse gets expanded.
10899 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
10900 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10901 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
10902 }
10903
10904 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
10905 // iff x >= bw/2 (i.e. lower half is known zero)
10906 unsigned BW = VT.getScalarSizeInBits();
10907 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
10908 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10909 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
10910 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10911 ShAmt->getZExtValue() >= (BW / 2) &&
10912 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
10913 TLI.isTruncateFree(VT, HalfVT) &&
10914 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
10915 SDValue Res = N0.getOperand(0);
10916 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
10917 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
10918 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
10919 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
10920 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
10921 return DAG.getZExtOrTrunc(Res, DL, VT);
10922 }
10923 }
10924
10925 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
10926 // inverse-shift-of-bswap:
10927 // bswap (X u<< C) --> (bswap X) u>> C
10928 // bswap (X u>> C) --> (bswap X) u<< C
10929 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10930 N0.hasOneUse()) {
10931 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10932 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
10933 ShAmt->getZExtValue() % 8 == 0) {
10934 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
10935 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
10936 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
10937 }
10938 }
10939
10940 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
10941 return V;
10942
10943 return SDValue();
10944}
10945
10946SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
10947 SDValue N0 = N->getOperand(0);
10948 EVT VT = N->getValueType(0);
10949 SDLoc DL(N);
10950
10951 // fold (bitreverse c1) -> c2
10952 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
10953 return C;
10954 // fold (bitreverse (bitreverse x)) -> x
10955 if (N0.getOpcode() == ISD::BITREVERSE)
10956 return N0.getOperand(0);
10957 return SDValue();
10958}
10959
10960SDValue DAGCombiner::visitCTLZ(SDNode *N) {
10961 SDValue N0 = N->getOperand(0);
10962 EVT VT = N->getValueType(0);
10963 SDLoc DL(N);
10964
10965 // fold (ctlz c1) -> c2
10966 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
10967 return C;
10968
10969 // If the value is known never to be zero, switch to the undef version.
10970 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
10971 if (DAG.isKnownNeverZero(N0))
10972 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
10973
10974 return SDValue();
10975}
10976
10977SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
10978 SDValue N0 = N->getOperand(0);
10979 EVT VT = N->getValueType(0);
10980 SDLoc DL(N);
10981
10982 // fold (ctlz_zero_undef c1) -> c2
10983 if (SDValue C =
10985 return C;
10986 return SDValue();
10987}
10988
10989SDValue DAGCombiner::visitCTTZ(SDNode *N) {
10990 SDValue N0 = N->getOperand(0);
10991 EVT VT = N->getValueType(0);
10992 SDLoc DL(N);
10993
10994 // fold (cttz c1) -> c2
10995 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
10996 return C;
10997
10998 // If the value is known never to be zero, switch to the undef version.
10999 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11000 if (DAG.isKnownNeverZero(N0))
11001 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11002
11003 return SDValue();
11004}
11005
11006SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11007 SDValue N0 = N->getOperand(0);
11008 EVT VT = N->getValueType(0);
11009 SDLoc DL(N);
11010
11011 // fold (cttz_zero_undef c1) -> c2
11012 if (SDValue C =
11014 return C;
11015 return SDValue();
11016}
11017
11018SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11019 SDValue N0 = N->getOperand(0);
11020 EVT VT = N->getValueType(0);
11021 unsigned NumBits = VT.getScalarSizeInBits();
11022 SDLoc DL(N);
11023
11024 // fold (ctpop c1) -> c2
11025 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11026 return C;
11027
11028 // If the source is being shifted, but doesn't affect any active bits,
11029 // then we can call CTPOP on the shift source directly.
11030 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11031 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11032 const APInt &Amt = AmtC->getAPIntValue();
11033 if (Amt.ult(NumBits)) {
11034 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11035 if ((N0.getOpcode() == ISD::SRL &&
11036 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11037 (N0.getOpcode() == ISD::SHL &&
11038 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11039 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11040 }
11041 }
11042 }
11043 }
11044
11045 // If the upper bits are known to be zero, then see if its profitable to
11046 // only count the lower bits.
11047 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11048 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11049 if (hasOperation(ISD::CTPOP, HalfVT) &&
11050 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11051 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11052 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11053 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11054 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11055 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11056 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11057 }
11058 }
11059 }
11060
11061 return SDValue();
11062}
11063
11064// FIXME: This should be checking for no signed zeros on individual operands, as
11065// well as no nans.
11067 SDValue RHS,
11068 const TargetLowering &TLI) {
11069 const TargetOptions &Options = DAG.getTarget().Options;
11070 EVT VT = LHS.getValueType();
11071
11072 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11075}
11076
11078 SDValue RHS, SDValue True, SDValue False,
11080 const TargetLowering &TLI,
11081 SelectionDAG &DAG) {
11082 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11083 switch (CC) {
11084 case ISD::SETOLT:
11085 case ISD::SETOLE:
11086 case ISD::SETLT:
11087 case ISD::SETLE:
11088 case ISD::SETULT:
11089 case ISD::SETULE: {
11090 // Since it's known never nan to get here already, either fminnum or
11091 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11092 // expanded in terms of it.
11093 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11094 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11095 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11096
11097 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11098 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11099 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11100 return SDValue();
11101 }
11102 case ISD::SETOGT:
11103 case ISD::SETOGE:
11104 case ISD::SETGT:
11105 case ISD::SETGE:
11106 case ISD::SETUGT:
11107 case ISD::SETUGE: {
11108 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11109 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11110 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11111
11112 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11113 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11114 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11115 return SDValue();
11116 }
11117 default:
11118 return SDValue();
11119 }
11120}
11121
11122/// Generate Min/Max node
11123SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11124 SDValue RHS, SDValue True,
11125 SDValue False, ISD::CondCode CC) {
11126 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11127 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11128
11129 // If we can't directly match this, try to see if we can pull an fneg out of
11130 // the select.
11132 True, DAG, LegalOperations, ForCodeSize);
11133 if (!NegTrue)
11134 return SDValue();
11135
11136 HandleSDNode NegTrueHandle(NegTrue);
11137
11138 // Try to unfold an fneg from the select if we are comparing the negated
11139 // constant.
11140 //
11141 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11142 //
11143 // TODO: Handle fabs
11144 if (LHS == NegTrue) {
11145 // If we can't directly match this, try to see if we can pull an fneg out of
11146 // the select.
11148 RHS, DAG, LegalOperations, ForCodeSize);
11149 if (NegRHS) {
11150 HandleSDNode NegRHSHandle(NegRHS);
11151 if (NegRHS == False) {
11152 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11153 False, CC, TLI, DAG);
11154 if (Combined)
11155 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11156 }
11157 }
11158 }
11159
11160 return SDValue();
11161}
11162
11163/// If a (v)select has a condition value that is a sign-bit test, try to smear
11164/// the condition operand sign-bit across the value width and use it as a mask.
11166 SDValue Cond = N->getOperand(0);
11167 SDValue C1 = N->getOperand(1);
11168 SDValue C2 = N->getOperand(2);
11170 return SDValue();
11171
11172 EVT VT = N->getValueType(0);
11173 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11174 VT != Cond.getOperand(0).getValueType())
11175 return SDValue();
11176
11177 // The inverted-condition + commuted-select variants of these patterns are
11178 // canonicalized to these forms in IR.
11179 SDValue X = Cond.getOperand(0);
11180 SDValue CondC = Cond.getOperand(1);
11181 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11182 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11184 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11185 SDLoc DL(N);
11186 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11187 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11188 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11189 }
11190 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11191 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11192 SDLoc DL(N);
11193 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11194 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11195 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11196 }
11197 return SDValue();
11198}
11199
11201 const TargetLowering &TLI) {
11202 if (!TLI.convertSelectOfConstantsToMath(VT))
11203 return false;
11204
11205 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11206 return true;
11208 return true;
11209
11210 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11211 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11212 return true;
11213 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11214 return true;
11215
11216 return false;
11217}
11218
11219SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11220 SDValue Cond = N->getOperand(0);
11221 SDValue N1 = N->getOperand(1);
11222 SDValue N2 = N->getOperand(2);
11223 EVT VT = N->getValueType(0);
11224 EVT CondVT = Cond.getValueType();
11225 SDLoc DL(N);
11226
11227 if (!VT.isInteger())
11228 return SDValue();
11229
11230 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11231 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11232 if (!C1 || !C2)
11233 return SDValue();
11234
11235 if (CondVT != MVT::i1 || LegalOperations) {
11236 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11237 // We can't do this reliably if integer based booleans have different contents
11238 // to floating point based booleans. This is because we can't tell whether we
11239 // have an integer-based boolean or a floating-point-based boolean unless we
11240 // can find the SETCC that produced it and inspect its operands. This is
11241 // fairly easy if C is the SETCC node, but it can potentially be
11242 // undiscoverable (or not reasonably discoverable). For example, it could be
11243 // in another basic block or it could require searching a complicated
11244 // expression.
11245 if (CondVT.isInteger() &&
11246 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11248 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11250 C1->isZero() && C2->isOne()) {
11251 SDValue NotCond =
11252 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11253 if (VT.bitsEq(CondVT))
11254 return NotCond;
11255 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11256 }
11257
11258 return SDValue();
11259 }
11260
11261 // Only do this before legalization to avoid conflicting with target-specific
11262 // transforms in the other direction (create a select from a zext/sext). There
11263 // is also a target-independent combine here in DAGCombiner in the other
11264 // direction for (select Cond, -1, 0) when the condition is not i1.
11265 assert(CondVT == MVT::i1 && !LegalOperations);
11266
11267 // select Cond, 1, 0 --> zext (Cond)
11268 if (C1->isOne() && C2->isZero())
11269 return DAG.getZExtOrTrunc(Cond, DL, VT);
11270
11271 // select Cond, -1, 0 --> sext (Cond)
11272 if (C1->isAllOnes() && C2->isZero())
11273 return DAG.getSExtOrTrunc(Cond, DL, VT);
11274
11275 // select Cond, 0, 1 --> zext (!Cond)
11276 if (C1->isZero() && C2->isOne()) {
11277 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11278 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11279 return NotCond;
11280 }
11281
11282 // select Cond, 0, -1 --> sext (!Cond)
11283 if (C1->isZero() && C2->isAllOnes()) {
11284 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11285 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11286 return NotCond;
11287 }
11288
11289 // Use a target hook because some targets may prefer to transform in the
11290 // other direction.
11292 return SDValue();
11293
11294 // For any constants that differ by 1, we can transform the select into
11295 // an extend and add.
11296 const APInt &C1Val = C1->getAPIntValue();
11297 const APInt &C2Val = C2->getAPIntValue();
11298
11299 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11300 if (C1Val - 1 == C2Val) {
11301 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11302 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11303 }
11304
11305 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11306 if (C1Val + 1 == C2Val) {
11307 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11308 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11309 }
11310
11311 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11312 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11313 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11314 SDValue ShAmtC =
11315 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11316 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11317 }
11318
11319 // select Cond, -1, C --> or (sext Cond), C
11320 if (C1->isAllOnes()) {
11321 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11322 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11323 }
11324
11325 // select Cond, C, -1 --> or (sext (not Cond)), C
11326 if (C2->isAllOnes()) {
11327 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11328 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11329 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11330 }
11331
11333 return V;
11334
11335 return SDValue();
11336}
11337
11338template <class MatchContextClass>
11340 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11341 N->getOpcode() == ISD::VP_SELECT) &&
11342 "Expected a (v)(vp.)select");
11343 SDValue Cond = N->getOperand(0);
11344 SDValue T = N->getOperand(1), F = N->getOperand(2);
11345 EVT VT = N->getValueType(0);
11346 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11347 MatchContextClass matcher(DAG, TLI, N);
11348
11349 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11350 return SDValue();
11351
11352 // select Cond, Cond, F --> or Cond, F
11353 // select Cond, 1, F --> or Cond, F
11354 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11355 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11356
11357 // select Cond, T, Cond --> and Cond, T
11358 // select Cond, T, 0 --> and Cond, T
11359 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11360 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11361
11362 // select Cond, T, 1 --> or (not Cond), T
11363 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11364 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11365 DAG.getAllOnesConstant(SDLoc(N), VT));
11366 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11367 }
11368
11369 // select Cond, 0, F --> and (not Cond), F
11370 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11371 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11372 DAG.getAllOnesConstant(SDLoc(N), VT));
11373 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11374 }
11375
11376 return SDValue();
11377}
11378
11380 SDValue N0 = N->getOperand(0);
11381 SDValue N1 = N->getOperand(1);
11382 SDValue N2 = N->getOperand(2);
11383 EVT VT = N->getValueType(0);
11384 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11385 return SDValue();
11386
11387 SDValue Cond0 = N0.getOperand(0);
11388 SDValue Cond1 = N0.getOperand(1);
11389 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11390 if (VT != Cond0.getValueType())
11391 return SDValue();
11392
11393 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11394 // compare is inverted from that pattern ("Cond0 s> -1").
11395 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11396 ; // This is the pattern we are looking for.
11397 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11398 std::swap(N1, N2);
11399 else
11400 return SDValue();
11401
11402 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11403 if (isNullOrNullSplat(N2)) {
11404 SDLoc DL(N);
11405 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11406 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11407 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11408 }
11409
11410 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11411 if (isAllOnesOrAllOnesSplat(N1)) {
11412 SDLoc DL(N);
11413 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11414 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11415 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11416 }
11417
11418 // If we have to invert the sign bit mask, only do that transform if the
11419 // target has a bitwise 'and not' instruction (the invert is free).
11420 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11421 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11422 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11423 SDLoc DL(N);
11424 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11425 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11426 SDValue Not = DAG.getNOT(DL, Sra, VT);
11427 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11428 }
11429
11430 // TODO: There's another pattern in this family, but it may require
11431 // implementing hasOrNot() to check for profitability:
11432 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11433
11434 return SDValue();
11435}
11436
11437SDValue DAGCombiner::visitSELECT(SDNode *N) {
11438 SDValue N0 = N->getOperand(0);
11439 SDValue N1 = N->getOperand(1);
11440 SDValue N2 = N->getOperand(2);
11441 EVT VT = N->getValueType(0);
11442 EVT VT0 = N0.getValueType();
11443 SDLoc DL(N);
11444 SDNodeFlags Flags = N->getFlags();
11445
11446 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11447 return V;
11448
11449 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11450 return V;
11451
11452 // select (not Cond), N1, N2 -> select Cond, N2, N1
11453 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11454 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11455 SelectOp->setFlags(Flags);
11456 return SelectOp;
11457 }
11458
11459 if (SDValue V = foldSelectOfConstants(N))
11460 return V;
11461
11462 // If we can fold this based on the true/false value, do so.
11463 if (SimplifySelectOps(N, N1, N2))
11464 return SDValue(N, 0); // Don't revisit N.
11465
11466 if (VT0 == MVT::i1) {
11467 // The code in this block deals with the following 2 equivalences:
11468 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11469 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11470 // The target can specify its preferred form with the
11471 // shouldNormalizeToSelectSequence() callback. However we always transform
11472 // to the right anyway if we find the inner select exists in the DAG anyway
11473 // and we always transform to the left side if we know that we can further
11474 // optimize the combination of the conditions.
11475 bool normalizeToSequence =
11477 // select (and Cond0, Cond1), X, Y
11478 // -> select Cond0, (select Cond1, X, Y), Y
11479 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11480 SDValue Cond0 = N0->getOperand(0);
11481 SDValue Cond1 = N0->getOperand(1);
11482 SDValue InnerSelect =
11483 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11484 if (normalizeToSequence || !InnerSelect.use_empty())
11485 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11486 InnerSelect, N2, Flags);
11487 // Cleanup on failure.
11488 if (InnerSelect.use_empty())
11489 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11490 }
11491 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11492 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11493 SDValue Cond0 = N0->getOperand(0);
11494 SDValue Cond1 = N0->getOperand(1);
11495 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11496 Cond1, N1, N2, Flags);
11497 if (normalizeToSequence || !InnerSelect.use_empty())
11498 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11499 InnerSelect, Flags);
11500 // Cleanup on failure.
11501 if (InnerSelect.use_empty())
11502 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11503 }
11504
11505 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11506 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11507 SDValue N1_0 = N1->getOperand(0);
11508 SDValue N1_1 = N1->getOperand(1);
11509 SDValue N1_2 = N1->getOperand(2);
11510 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11511 // Create the actual and node if we can generate good code for it.
11512 if (!normalizeToSequence) {
11513 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11514 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11515 N2, Flags);
11516 }
11517 // Otherwise see if we can optimize the "and" to a better pattern.
11518 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11519 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11520 N2, Flags);
11521 }
11522 }
11523 }
11524 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11525 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11526 SDValue N2_0 = N2->getOperand(0);
11527 SDValue N2_1 = N2->getOperand(1);
11528 SDValue N2_2 = N2->getOperand(2);
11529 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11530 // Create the actual or node if we can generate good code for it.
11531 if (!normalizeToSequence) {
11532 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11533 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11534 N2_2, Flags);
11535 }
11536 // Otherwise see if we can optimize to a better pattern.
11537 if (SDValue Combined = visitORLike(N0, N2_0, N))
11538 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11539 N2_2, Flags);
11540 }
11541 }
11542 }
11543
11544 // Fold selects based on a setcc into other things, such as min/max/abs.
11545 if (N0.getOpcode() == ISD::SETCC) {
11546 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11547 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11548
11549 // select (fcmp lt x, y), x, y -> fminnum x, y
11550 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11551 //
11552 // This is OK if we don't care what happens if either operand is a NaN.
11553 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11554 if (SDValue FMinMax =
11555 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11556 return FMinMax;
11557
11558 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11559 // This is conservatively limited to pre-legal-operations to give targets
11560 // a chance to reverse the transform if they want to do that. Also, it is
11561 // unlikely that the pattern would be formed late, so it's probably not
11562 // worth going through the other checks.
11563 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11564 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11565 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11566 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11567 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11568 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11569 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11570 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11571 //
11572 // The IR equivalent of this transform would have this form:
11573 // %a = add %x, C
11574 // %c = icmp ugt %x, ~C
11575 // %r = select %c, -1, %a
11576 // =>
11577 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11578 // %u0 = extractvalue %u, 0
11579 // %u1 = extractvalue %u, 1
11580 // %r = select %u1, -1, %u0
11581 SDVTList VTs = DAG.getVTList(VT, VT0);
11582 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11583 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11584 }
11585 }
11586
11587 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11588 (!LegalOperations &&
11590 // Any flags available in a select/setcc fold will be on the setcc as they
11591 // migrated from fcmp
11592 Flags = N0->getFlags();
11593 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11594 N2, N0.getOperand(2));
11595 SelectNode->setFlags(Flags);
11596 return SelectNode;
11597 }
11598
11599 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11600 return NewSel;
11601 }
11602
11603 if (!VT.isVector())
11604 if (SDValue BinOp = foldSelectOfBinops(N))
11605 return BinOp;
11606
11607 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11608 return R;
11609
11610 return SDValue();
11611}
11612
11613// This function assumes all the vselect's arguments are CONCAT_VECTOR
11614// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11616 SDLoc DL(N);
11617 SDValue Cond = N->getOperand(0);
11618 SDValue LHS = N->getOperand(1);
11619 SDValue RHS = N->getOperand(2);
11620 EVT VT = N->getValueType(0);
11621 int NumElems = VT.getVectorNumElements();
11622 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11623 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11624 Cond.getOpcode() == ISD::BUILD_VECTOR);
11625
11626 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11627 // binary ones here.
11628 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11629 return SDValue();
11630
11631 // We're sure we have an even number of elements due to the
11632 // concat_vectors we have as arguments to vselect.
11633 // Skip BV elements until we find one that's not an UNDEF
11634 // After we find an UNDEF element, keep looping until we get to half the
11635 // length of the BV and see if all the non-undef nodes are the same.
11636 ConstantSDNode *BottomHalf = nullptr;
11637 for (int i = 0; i < NumElems / 2; ++i) {
11638 if (Cond->getOperand(i)->isUndef())
11639 continue;
11640
11641 if (BottomHalf == nullptr)
11642 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11643 else if (Cond->getOperand(i).getNode() != BottomHalf)
11644 return SDValue();
11645 }
11646
11647 // Do the same for the second half of the BuildVector
11648 ConstantSDNode *TopHalf = nullptr;
11649 for (int i = NumElems / 2; i < NumElems; ++i) {
11650 if (Cond->getOperand(i)->isUndef())
11651 continue;
11652
11653 if (TopHalf == nullptr)
11654 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11655 else if (Cond->getOperand(i).getNode() != TopHalf)
11656 return SDValue();
11657 }
11658
11659 assert(TopHalf && BottomHalf &&
11660 "One half of the selector was all UNDEFs and the other was all the "
11661 "same value. This should have been addressed before this function.");
11662 return DAG.getNode(
11664 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11665 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11666}
11667
11668bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11669 SelectionDAG &DAG, const SDLoc &DL) {
11670
11671 // Only perform the transformation when existing operands can be reused.
11672 if (IndexIsScaled)
11673 return false;
11674
11675 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11676 return false;
11677
11678 EVT VT = BasePtr.getValueType();
11679
11680 if (SDValue SplatVal = DAG.getSplatValue(Index);
11681 SplatVal && !isNullConstant(SplatVal) &&
11682 SplatVal.getValueType() == VT) {
11683 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11684 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11685 return true;
11686 }
11687
11688 if (Index.getOpcode() != ISD::ADD)
11689 return false;
11690
11691 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11692 SplatVal && SplatVal.getValueType() == VT) {
11693 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11694 Index = Index.getOperand(1);
11695 return true;
11696 }
11697 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11698 SplatVal && SplatVal.getValueType() == VT) {
11699 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11700 Index = Index.getOperand(0);
11701 return true;
11702 }
11703 return false;
11704}
11705
11706// Fold sext/zext of index into index type.
11708 SelectionDAG &DAG) {
11709 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11710
11711 // It's always safe to look through zero extends.
11712 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11713 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11714 IndexType = ISD::UNSIGNED_SCALED;
11715 Index = Index.getOperand(0);
11716 return true;
11717 }
11718 if (ISD::isIndexTypeSigned(IndexType)) {
11719 IndexType = ISD::UNSIGNED_SCALED;
11720 return true;
11721 }
11722 }
11723
11724 // It's only safe to look through sign extends when Index is signed.
11725 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11726 ISD::isIndexTypeSigned(IndexType) &&
11727 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11728 Index = Index.getOperand(0);
11729 return true;
11730 }
11731
11732 return false;
11733}
11734
11735SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11736 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11737 SDValue Mask = MSC->getMask();
11738 SDValue Chain = MSC->getChain();
11739 SDValue Index = MSC->getIndex();
11740 SDValue Scale = MSC->getScale();
11741 SDValue StoreVal = MSC->getValue();
11742 SDValue BasePtr = MSC->getBasePtr();
11743 SDValue VL = MSC->getVectorLength();
11744 ISD::MemIndexType IndexType = MSC->getIndexType();
11745 SDLoc DL(N);
11746
11747 // Zap scatters with a zero mask.
11749 return Chain;
11750
11751 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11752 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11753 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11754 DL, Ops, MSC->getMemOperand(), IndexType);
11755 }
11756
11757 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11758 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11759 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11760 DL, Ops, MSC->getMemOperand(), IndexType);
11761 }
11762
11763 return SDValue();
11764}
11765
11766SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11767 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11768 SDValue Mask = MSC->getMask();
11769 SDValue Chain = MSC->getChain();
11770 SDValue Index = MSC->getIndex();
11771 SDValue Scale = MSC->getScale();
11772 SDValue StoreVal = MSC->getValue();
11773 SDValue BasePtr = MSC->getBasePtr();
11774 ISD::MemIndexType IndexType = MSC->getIndexType();
11775 SDLoc DL(N);
11776
11777 // Zap scatters with a zero mask.
11779 return Chain;
11780
11781 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11782 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11783 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11784 DL, Ops, MSC->getMemOperand(), IndexType,
11785 MSC->isTruncatingStore());
11786 }
11787
11788 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11789 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11790 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11791 DL, Ops, MSC->getMemOperand(), IndexType,
11792 MSC->isTruncatingStore());
11793 }
11794
11795 return SDValue();
11796}
11797
11798SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11799 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11800 SDValue Mask = MST->getMask();
11801 SDValue Chain = MST->getChain();
11802 SDValue Value = MST->getValue();
11803 SDValue Ptr = MST->getBasePtr();
11804 SDLoc DL(N);
11805
11806 // Zap masked stores with a zero mask.
11808 return Chain;
11809
11810 // Remove a masked store if base pointers and masks are equal.
11811 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11812 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11813 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11814 !MST->getBasePtr().isUndef() &&
11815 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11816 MST1->getMemoryVT().getStoreSize()) ||
11818 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11819 MST->getMemoryVT().getStoreSize())) {
11820 CombineTo(MST1, MST1->getChain());
11821 if (N->getOpcode() != ISD::DELETED_NODE)
11822 AddToWorklist(N);
11823 return SDValue(N, 0);
11824 }
11825 }
11826
11827 // If this is a masked load with an all ones mask, we can use a unmasked load.
11828 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11829 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11830 !MST->isCompressingStore() && !MST->isTruncatingStore())
11831 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11832 MST->getBasePtr(), MST->getPointerInfo(),
11833 MST->getOriginalAlign(),
11834 MST->getMemOperand()->getFlags(), MST->getAAInfo());
11835
11836 // Try transforming N to an indexed store.
11837 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11838 return SDValue(N, 0);
11839
11840 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11841 Value.getValueType().isInteger() &&
11842 (!isa<ConstantSDNode>(Value) ||
11843 !cast<ConstantSDNode>(Value)->isOpaque())) {
11844 APInt TruncDemandedBits =
11845 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11847
11848 // See if we can simplify the operation with
11849 // SimplifyDemandedBits, which only works if the value has a single use.
11850 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11851 // Re-visit the store if anything changed and the store hasn't been merged
11852 // with another node (N is deleted) SimplifyDemandedBits will add Value's
11853 // node back to the worklist if necessary, but we also need to re-visit
11854 // the Store node itself.
11855 if (N->getOpcode() != ISD::DELETED_NODE)
11856 AddToWorklist(N);
11857 return SDValue(N, 0);
11858 }
11859 }
11860
11861 // If this is a TRUNC followed by a masked store, fold this into a masked
11862 // truncating store. We can do this even if this is already a masked
11863 // truncstore.
11864 // TODO: Try combine to masked compress store if possiable.
11865 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11866 MST->isUnindexed() && !MST->isCompressingStore() &&
11867 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11868 MST->getMemoryVT(), LegalOperations)) {
11869 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11870 Value.getOperand(0).getValueType());
11871 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11872 MST->getOffset(), Mask, MST->getMemoryVT(),
11873 MST->getMemOperand(), MST->getAddressingMode(),
11874 /*IsTruncating=*/true);
11875 }
11876
11877 return SDValue();
11878}
11879
11880SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11881 auto *SST = cast<VPStridedStoreSDNode>(N);
11882 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11883 // Combine strided stores with unit-stride to a regular VP store.
11884 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11885 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11886 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11887 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11888 SST->getVectorLength(), SST->getMemoryVT(),
11889 SST->getMemOperand(), SST->getAddressingMode(),
11890 SST->isTruncatingStore(), SST->isCompressingStore());
11891 }
11892 return SDValue();
11893}
11894
11895SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11896 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11897 SDValue Mask = MGT->getMask();
11898 SDValue Chain = MGT->getChain();
11899 SDValue Index = MGT->getIndex();
11900 SDValue Scale = MGT->getScale();
11901 SDValue BasePtr = MGT->getBasePtr();
11902 SDValue VL = MGT->getVectorLength();
11903 ISD::MemIndexType IndexType = MGT->getIndexType();
11904 SDLoc DL(N);
11905
11906 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11907 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11908 return DAG.getGatherVP(
11909 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11910 Ops, MGT->getMemOperand(), IndexType);
11911 }
11912
11913 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11914 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11915 return DAG.getGatherVP(
11916 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11917 Ops, MGT->getMemOperand(), IndexType);
11918 }
11919
11920 return SDValue();
11921}
11922
11923SDValue DAGCombiner::visitMGATHER(SDNode *N) {
11924 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
11925 SDValue Mask = MGT->getMask();
11926 SDValue Chain = MGT->getChain();
11927 SDValue Index = MGT->getIndex();
11928 SDValue Scale = MGT->getScale();
11929 SDValue PassThru = MGT->getPassThru();
11930 SDValue BasePtr = MGT->getBasePtr();
11931 ISD::MemIndexType IndexType = MGT->getIndexType();
11932 SDLoc DL(N);
11933
11934 // Zap gathers with a zero mask.
11936 return CombineTo(N, PassThru, MGT->getChain());
11937
11938 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11939 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11940 return DAG.getMaskedGather(
11941 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11942 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11943 }
11944
11945 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11946 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
11947 return DAG.getMaskedGather(
11948 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11949 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
11950 }
11951
11952 return SDValue();
11953}
11954
11955SDValue DAGCombiner::visitMLOAD(SDNode *N) {
11956 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
11957 SDValue Mask = MLD->getMask();
11958 SDLoc DL(N);
11959
11960 // Zap masked loads with a zero mask.
11962 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
11963
11964 // If this is a masked load with an all ones mask, we can use a unmasked load.
11965 // FIXME: Can we do this for indexed, expanding, or extending loads?
11966 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
11967 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
11968 SDValue NewLd = DAG.getLoad(
11969 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
11970 MLD->getPointerInfo(), MLD->getOriginalAlign(),
11971 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
11972 return CombineTo(N, NewLd, NewLd.getValue(1));
11973 }
11974
11975 // Try transforming N to an indexed load.
11976 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11977 return SDValue(N, 0);
11978
11979 return SDValue();
11980}
11981
11982SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
11983 auto *SLD = cast<VPStridedLoadSDNode>(N);
11984 EVT EltVT = SLD->getValueType(0).getVectorElementType();
11985 // Combine strided loads with unit-stride to a regular VP load.
11986 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
11987 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11988 SDValue NewLd = DAG.getLoadVP(
11989 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
11990 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
11991 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
11992 SLD->getMemOperand(), SLD->isExpandingLoad());
11993 return CombineTo(N, NewLd, NewLd.getValue(1));
11994 }
11995 return SDValue();
11996}
11997
11998/// A vector select of 2 constant vectors can be simplified to math/logic to
11999/// avoid a variable select instruction and possibly avoid constant loads.
12000SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12001 SDValue Cond = N->getOperand(0);
12002 SDValue N1 = N->getOperand(1);
12003 SDValue N2 = N->getOperand(2);
12004 EVT VT = N->getValueType(0);
12005 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12009 return SDValue();
12010
12011 // Check if we can use the condition value to increment/decrement a single
12012 // constant value. This simplifies a select to an add and removes a constant
12013 // load/materialization from the general case.
12014 bool AllAddOne = true;
12015 bool AllSubOne = true;
12016 unsigned Elts = VT.getVectorNumElements();
12017 for (unsigned i = 0; i != Elts; ++i) {
12018 SDValue N1Elt = N1.getOperand(i);
12019 SDValue N2Elt = N2.getOperand(i);
12020 if (N1Elt.isUndef() || N2Elt.isUndef())
12021 continue;
12022 if (N1Elt.getValueType() != N2Elt.getValueType())
12023 continue;
12024
12025 const APInt &C1 = N1Elt->getAsAPIntVal();
12026 const APInt &C2 = N2Elt->getAsAPIntVal();
12027 if (C1 != C2 + 1)
12028 AllAddOne = false;
12029 if (C1 != C2 - 1)
12030 AllSubOne = false;
12031 }
12032
12033 // Further simplifications for the extra-special cases where the constants are
12034 // all 0 or all -1 should be implemented as folds of these patterns.
12035 SDLoc DL(N);
12036 if (AllAddOne || AllSubOne) {
12037 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12038 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12039 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12040 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12041 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12042 }
12043
12044 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12045 APInt Pow2C;
12046 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12047 isNullOrNullSplat(N2)) {
12048 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12049 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12050 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12051 }
12052
12054 return V;
12055
12056 // The general case for select-of-constants:
12057 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12058 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12059 // leave that to a machine-specific pass.
12060 return SDValue();
12061}
12062
12063SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12064 SDValue N0 = N->getOperand(0);
12065 SDValue N1 = N->getOperand(1);
12066 SDValue N2 = N->getOperand(2);
12067
12068 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12069 return V;
12070
12071 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12072 return V;
12073
12074 return SDValue();
12075}
12076
12077SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12078 SDValue N0 = N->getOperand(0);
12079 SDValue N1 = N->getOperand(1);
12080 SDValue N2 = N->getOperand(2);
12081 EVT VT = N->getValueType(0);
12082 SDLoc DL(N);
12083
12084 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12085 return V;
12086
12087 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12088 return V;
12089
12090 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12091 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12092 return DAG.getSelect(DL, VT, F, N2, N1);
12093
12094 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12095 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12098 TLI.getBooleanContents(N0.getValueType()) ==
12100 return DAG.getNode(
12101 ISD::ADD, DL, N1.getValueType(), N2,
12102 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12103 }
12104
12105 // Canonicalize integer abs.
12106 // vselect (setg[te] X, 0), X, -X ->
12107 // vselect (setgt X, -1), X, -X ->
12108 // vselect (setl[te] X, 0), -X, X ->
12109 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12110 if (N0.getOpcode() == ISD::SETCC) {
12111 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12112 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12113 bool isAbs = false;
12114 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12115
12116 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12117 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12118 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12120 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12121 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12123
12124 if (isAbs) {
12126 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12127
12128 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12129 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12130 DL, getShiftAmountTy(VT)));
12131 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12132 AddToWorklist(Shift.getNode());
12133 AddToWorklist(Add.getNode());
12134 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12135 }
12136
12137 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12138 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12139 //
12140 // This is OK if we don't care about what happens if either operand is a
12141 // NaN.
12142 //
12143 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12144 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12145 return FMinMax;
12146 }
12147
12148 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12149 return S;
12150 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12151 return S;
12152
12153 // If this select has a condition (setcc) with narrower operands than the
12154 // select, try to widen the compare to match the select width.
12155 // TODO: This should be extended to handle any constant.
12156 // TODO: This could be extended to handle non-loading patterns, but that
12157 // requires thorough testing to avoid regressions.
12158 if (isNullOrNullSplat(RHS)) {
12159 EVT NarrowVT = LHS.getValueType();
12161 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12162 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12163 unsigned WideWidth = WideVT.getScalarSizeInBits();
12164 bool IsSigned = isSignedIntSetCC(CC);
12165 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12166 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12167 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12168 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12169 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12170 // Both compare operands can be widened for free. The LHS can use an
12171 // extended load, and the RHS is a constant:
12172 // vselect (ext (setcc load(X), C)), N1, N2 -->
12173 // vselect (setcc extload(X), C'), N1, N2
12174 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12175 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12176 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12177 EVT WideSetCCVT = getSetCCResultType(WideVT);
12178 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12179 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12180 }
12181 }
12182
12183 // Match VSELECTs with absolute difference patterns.
12184 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12185 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12186 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12187 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12188 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12189 N1.getOperand(0) == N2.getOperand(1) &&
12190 N1.getOperand(1) == N2.getOperand(0)) {
12191 bool IsSigned = isSignedIntSetCC(CC);
12192 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12193 if (hasOperation(ABDOpc, VT)) {
12194 switch (CC) {
12195 case ISD::SETGT:
12196 case ISD::SETGE:
12197 case ISD::SETUGT:
12198 case ISD::SETUGE:
12199 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12200 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12201 break;
12202 case ISD::SETLT:
12203 case ISD::SETLE:
12204 case ISD::SETULT:
12205 case ISD::SETULE:
12206 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12207 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12208 break;
12209 default:
12210 break;
12211 }
12212 }
12213 }
12214
12215 // Match VSELECTs into add with unsigned saturation.
12216 if (hasOperation(ISD::UADDSAT, VT)) {
12217 // Check if one of the arms of the VSELECT is vector with all bits set.
12218 // If it's on the left side invert the predicate to simplify logic below.
12219 SDValue Other;
12220 ISD::CondCode SatCC = CC;
12222 Other = N2;
12223 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12224 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12225 Other = N1;
12226 }
12227
12228 if (Other && Other.getOpcode() == ISD::ADD) {
12229 SDValue CondLHS = LHS, CondRHS = RHS;
12230 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12231
12232 // Canonicalize condition operands.
12233 if (SatCC == ISD::SETUGE) {
12234 std::swap(CondLHS, CondRHS);
12235 SatCC = ISD::SETULE;
12236 }
12237
12238 // We can test against either of the addition operands.
12239 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12240 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12241 if (SatCC == ISD::SETULE && Other == CondRHS &&
12242 (OpLHS == CondLHS || OpRHS == CondLHS))
12243 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12244
12245 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12246 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12247 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12248 CondLHS == OpLHS) {
12249 // If the RHS is a constant we have to reverse the const
12250 // canonicalization.
12251 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12252 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12253 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12254 };
12255 if (SatCC == ISD::SETULE &&
12256 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12257 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12258 }
12259 }
12260 }
12261
12262 // Match VSELECTs into sub with unsigned saturation.
12263 if (hasOperation(ISD::USUBSAT, VT)) {
12264 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12265 // the left side invert the predicate to simplify logic below.
12266 SDValue Other;
12267 ISD::CondCode SatCC = CC;
12269 Other = N2;
12270 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12272 Other = N1;
12273 }
12274
12275 // zext(x) >= y ? trunc(zext(x) - y) : 0
12276 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12277 // zext(x) > y ? trunc(zext(x) - y) : 0
12278 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12279 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12280 Other.getOperand(0).getOpcode() == ISD::SUB &&
12281 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12282 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12283 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12284 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12285 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12286 DAG, DL))
12287 return R;
12288 }
12289
12290 if (Other && Other.getNumOperands() == 2) {
12291 SDValue CondRHS = RHS;
12292 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12293
12294 if (OpLHS == LHS) {
12295 // Look for a general sub with unsigned saturation first.
12296 // x >= y ? x-y : 0 --> usubsat x, y
12297 // x > y ? x-y : 0 --> usubsat x, y
12298 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12299 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12300 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12301
12302 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12303 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12304 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12305 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12306 // If the RHS is a constant we have to reverse the const
12307 // canonicalization.
12308 // x > C-1 ? x+-C : 0 --> usubsat x, C
12309 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12310 return (!Op && !Cond) ||
12311 (Op && Cond &&
12312 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12313 };
12314 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12315 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12316 /*AllowUndefs*/ true)) {
12317 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12318 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12319 }
12320
12321 // Another special case: If C was a sign bit, the sub has been
12322 // canonicalized into a xor.
12323 // FIXME: Would it be better to use computeKnownBits to
12324 // determine whether it's safe to decanonicalize the xor?
12325 // x s< 0 ? x^C : 0 --> usubsat x, C
12326 APInt SplatValue;
12327 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12328 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12330 SplatValue.isSignMask()) {
12331 // Note that we have to rebuild the RHS constant here to
12332 // ensure we don't rely on particular values of undef lanes.
12333 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12334 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12335 }
12336 }
12337 }
12338 }
12339 }
12340 }
12341 }
12342
12343 if (SimplifySelectOps(N, N1, N2))
12344 return SDValue(N, 0); // Don't revisit N.
12345
12346 // Fold (vselect all_ones, N1, N2) -> N1
12348 return N1;
12349 // Fold (vselect all_zeros, N1, N2) -> N2
12351 return N2;
12352
12353 // The ConvertSelectToConcatVector function is assuming both the above
12354 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12355 // and addressed.
12356 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12359 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12360 return CV;
12361 }
12362
12363 if (SDValue V = foldVSelectOfConstants(N))
12364 return V;
12365
12366 if (hasOperation(ISD::SRA, VT))
12368 return V;
12369
12371 return SDValue(N, 0);
12372
12373 return SDValue();
12374}
12375
12376SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12377 SDValue N0 = N->getOperand(0);
12378 SDValue N1 = N->getOperand(1);
12379 SDValue N2 = N->getOperand(2);
12380 SDValue N3 = N->getOperand(3);
12381 SDValue N4 = N->getOperand(4);
12382 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12383
12384 // fold select_cc lhs, rhs, x, x, cc -> x
12385 if (N2 == N3)
12386 return N2;
12387
12388 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12389 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12390 isNullConstant(N1))
12391 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12392
12393 // Determine if the condition we're dealing with is constant
12394 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12395 CC, SDLoc(N), false)) {
12396 AddToWorklist(SCC.getNode());
12397
12398 // cond always true -> true val
12399 // cond always false -> false val
12400 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12401 return SCCC->isZero() ? N3 : N2;
12402
12403 // When the condition is UNDEF, just return the first operand. This is
12404 // coherent the DAG creation, no setcc node is created in this case
12405 if (SCC->isUndef())
12406 return N2;
12407
12408 // Fold to a simpler select_cc
12409 if (SCC.getOpcode() == ISD::SETCC) {
12410 SDValue SelectOp = DAG.getNode(
12411 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12412 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12413 SelectOp->setFlags(SCC->getFlags());
12414 return SelectOp;
12415 }
12416 }
12417
12418 // If we can fold this based on the true/false value, do so.
12419 if (SimplifySelectOps(N, N2, N3))
12420 return SDValue(N, 0); // Don't revisit N.
12421
12422 // fold select_cc into other things, such as min/max/abs
12423 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12424}
12425
12426SDValue DAGCombiner::visitSETCC(SDNode *N) {
12427 // setcc is very commonly used as an argument to brcond. This pattern
12428 // also lend itself to numerous combines and, as a result, it is desired
12429 // we keep the argument to a brcond as a setcc as much as possible.
12430 bool PreferSetCC =
12431 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12432
12433 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12434 EVT VT = N->getValueType(0);
12435 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12436
12437 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12438
12439 if (Combined) {
12440 // If we prefer to have a setcc, and we don't, we'll try our best to
12441 // recreate one using rebuildSetCC.
12442 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12443 SDValue NewSetCC = rebuildSetCC(Combined);
12444
12445 // We don't have anything interesting to combine to.
12446 if (NewSetCC.getNode() == N)
12447 return SDValue();
12448
12449 if (NewSetCC)
12450 return NewSetCC;
12451 }
12452 return Combined;
12453 }
12454
12455 // Optimize
12456 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12457 // or
12458 // 2) (icmp eq/ne X, (rotate X, C1))
12459 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12460 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12461 // Then:
12462 // If C1 is a power of 2, then the rotate and shift+and versions are
12463 // equivilent, so we can interchange them depending on target preference.
12464 // Otherwise, if we have the shift+and version we can interchange srl/shl
12465 // which inturn affects the constant C0. We can use this to get better
12466 // constants again determined by target preference.
12467 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12468 auto IsAndWithShift = [](SDValue A, SDValue B) {
12469 return A.getOpcode() == ISD::AND &&
12470 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12471 A.getOperand(0) == B.getOperand(0);
12472 };
12473 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12474 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12475 B.getOperand(0) == A;
12476 };
12477 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12478 bool IsRotate = false;
12479
12480 // Find either shift+and or rotate pattern.
12481 if (IsAndWithShift(N0, N1)) {
12482 AndOrOp = N0;
12483 ShiftOrRotate = N1;
12484 } else if (IsAndWithShift(N1, N0)) {
12485 AndOrOp = N1;
12486 ShiftOrRotate = N0;
12487 } else if (IsRotateWithOp(N0, N1)) {
12488 IsRotate = true;
12489 AndOrOp = N0;
12490 ShiftOrRotate = N1;
12491 } else if (IsRotateWithOp(N1, N0)) {
12492 IsRotate = true;
12493 AndOrOp = N1;
12494 ShiftOrRotate = N0;
12495 }
12496
12497 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12498 (IsRotate || AndOrOp.hasOneUse())) {
12499 EVT OpVT = N0.getValueType();
12500 // Get constant shift/rotate amount and possibly mask (if its shift+and
12501 // variant).
12502 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12503 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12504 /*AllowTrunc*/ false);
12505 if (CNode == nullptr)
12506 return std::nullopt;
12507 return CNode->getAPIntValue();
12508 };
12509 std::optional<APInt> AndCMask =
12510 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12511 std::optional<APInt> ShiftCAmt =
12512 GetAPIntValue(ShiftOrRotate.getOperand(1));
12513 unsigned NumBits = OpVT.getScalarSizeInBits();
12514
12515 // We found constants.
12516 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12517 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12518 // Check that the constants meet the constraints.
12519 bool CanTransform = IsRotate;
12520 if (!CanTransform) {
12521 // Check that mask and shift compliment eachother
12522 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12523 // Check that we are comparing all bits
12524 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12525 // Check that the and mask is correct for the shift
12526 CanTransform &=
12527 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12528 }
12529
12530 // See if target prefers another shift/rotate opcode.
12531 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12532 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12533 // Transform is valid and we have a new preference.
12534 if (CanTransform && NewShiftOpc != ShiftOpc) {
12535 SDLoc DL(N);
12536 SDValue NewShiftOrRotate =
12537 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12538 ShiftOrRotate.getOperand(1));
12539 SDValue NewAndOrOp = SDValue();
12540
12541 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12542 APInt NewMask =
12543 NewShiftOpc == ISD::SHL
12544 ? APInt::getHighBitsSet(NumBits,
12545 NumBits - ShiftCAmt->getZExtValue())
12546 : APInt::getLowBitsSet(NumBits,
12547 NumBits - ShiftCAmt->getZExtValue());
12548 NewAndOrOp =
12549 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12550 DAG.getConstant(NewMask, DL, OpVT));
12551 } else {
12552 NewAndOrOp = ShiftOrRotate.getOperand(0);
12553 }
12554
12555 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12556 }
12557 }
12558 }
12559 }
12560 return SDValue();
12561}
12562
12563SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12564 SDValue LHS = N->getOperand(0);
12565 SDValue RHS = N->getOperand(1);
12566 SDValue Carry = N->getOperand(2);
12567 SDValue Cond = N->getOperand(3);
12568
12569 // If Carry is false, fold to a regular SETCC.
12570 if (isNullConstant(Carry))
12571 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12572
12573 return SDValue();
12574}
12575
12576/// Check if N satisfies:
12577/// N is used once.
12578/// N is a Load.
12579/// The load is compatible with ExtOpcode. It means
12580/// If load has explicit zero/sign extension, ExpOpcode must have the same
12581/// extension.
12582/// Otherwise returns true.
12583static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12584 if (!N.hasOneUse())
12585 return false;
12586
12587 if (!isa<LoadSDNode>(N))
12588 return false;
12589
12590 LoadSDNode *Load = cast<LoadSDNode>(N);
12591 ISD::LoadExtType LoadExt = Load->getExtensionType();
12592 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12593 return true;
12594
12595 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12596 // extension.
12597 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12598 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12599 return false;
12600
12601 return true;
12602}
12603
12604/// Fold
12605/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12606/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12607/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12608/// This function is called by the DAGCombiner when visiting sext/zext/aext
12609/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12611 SelectionDAG &DAG,
12612 CombineLevel Level) {
12613 unsigned Opcode = N->getOpcode();
12614 SDValue N0 = N->getOperand(0);
12615 EVT VT = N->getValueType(0);
12616 SDLoc DL(N);
12617
12618 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12619 Opcode == ISD::ANY_EXTEND) &&
12620 "Expected EXTEND dag node in input!");
12621
12622 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12623 !N0.hasOneUse())
12624 return SDValue();
12625
12626 SDValue Op1 = N0->getOperand(1);
12627 SDValue Op2 = N0->getOperand(2);
12628 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12629 return SDValue();
12630
12631 auto ExtLoadOpcode = ISD::EXTLOAD;
12632 if (Opcode == ISD::SIGN_EXTEND)
12633 ExtLoadOpcode = ISD::SEXTLOAD;
12634 else if (Opcode == ISD::ZERO_EXTEND)
12635 ExtLoadOpcode = ISD::ZEXTLOAD;
12636
12637 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12638 // Combine2), so we should conservatively check the OperationAction.
12639 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12640 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12641 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12642 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12643 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12645 return SDValue();
12646
12647 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12648 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12649 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12650}
12651
12652/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12653/// a build_vector of constants.
12654/// This function is called by the DAGCombiner when visiting sext/zext/aext
12655/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12656/// Vector extends are not folded if operations are legal; this is to
12657/// avoid introducing illegal build_vector dag nodes.
12659 const TargetLowering &TLI,
12660 SelectionDAG &DAG, bool LegalTypes) {
12661 unsigned Opcode = N->getOpcode();
12662 SDValue N0 = N->getOperand(0);
12663 EVT VT = N->getValueType(0);
12664
12665 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12666 "Expected EXTEND dag node in input!");
12667
12668 // fold (sext c1) -> c1
12669 // fold (zext c1) -> c1
12670 // fold (aext c1) -> c1
12671 if (isa<ConstantSDNode>(N0))
12672 return DAG.getNode(Opcode, DL, VT, N0);
12673
12674 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12675 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12676 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12677 if (N0->getOpcode() == ISD::SELECT) {
12678 SDValue Op1 = N0->getOperand(1);
12679 SDValue Op2 = N0->getOperand(2);
12680 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12681 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12682 // For any_extend, choose sign extension of the constants to allow a
12683 // possible further transform to sign_extend_inreg.i.e.
12684 //
12685 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12686 // t2: i64 = any_extend t1
12687 // -->
12688 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12689 // -->
12690 // t4: i64 = sign_extend_inreg t3
12691 unsigned FoldOpc = Opcode;
12692 if (FoldOpc == ISD::ANY_EXTEND)
12693 FoldOpc = ISD::SIGN_EXTEND;
12694 return DAG.getSelect(DL, VT, N0->getOperand(0),
12695 DAG.getNode(FoldOpc, DL, VT, Op1),
12696 DAG.getNode(FoldOpc, DL, VT, Op2));
12697 }
12698 }
12699
12700 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12701 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12702 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12703 EVT SVT = VT.getScalarType();
12704 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12706 return SDValue();
12707
12708 // We can fold this node into a build_vector.
12709 unsigned VTBits = SVT.getSizeInBits();
12710 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12712 unsigned NumElts = VT.getVectorNumElements();
12713
12714 for (unsigned i = 0; i != NumElts; ++i) {
12715 SDValue Op = N0.getOperand(i);
12716 if (Op.isUndef()) {
12717 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12718 Elts.push_back(DAG.getUNDEF(SVT));
12719 else
12720 Elts.push_back(DAG.getConstant(0, DL, SVT));
12721 continue;
12722 }
12723
12724 SDLoc DL(Op);
12725 // Get the constant value and if needed trunc it to the size of the type.
12726 // Nodes like build_vector might have constants wider than the scalar type.
12727 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12728 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12729 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12730 else
12731 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12732 }
12733
12734 return DAG.getBuildVector(VT, DL, Elts);
12735}
12736
12737// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12738// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12739// transformation. Returns true if extension are possible and the above
12740// mentioned transformation is profitable.
12742 unsigned ExtOpc,
12743 SmallVectorImpl<SDNode *> &ExtendNodes,
12744 const TargetLowering &TLI) {
12745 bool HasCopyToRegUses = false;
12746 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12747 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12748 ++UI) {
12749 SDNode *User = *UI;
12750 if (User == N)
12751 continue;
12752 if (UI.getUse().getResNo() != N0.getResNo())
12753 continue;
12754 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12755 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12756 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12757 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12758 // Sign bits will be lost after a zext.
12759 return false;
12760 bool Add = false;
12761 for (unsigned i = 0; i != 2; ++i) {
12762 SDValue UseOp = User->getOperand(i);
12763 if (UseOp == N0)
12764 continue;
12765 if (!isa<ConstantSDNode>(UseOp))
12766 return false;
12767 Add = true;
12768 }
12769 if (Add)
12770 ExtendNodes.push_back(User);
12771 continue;
12772 }
12773 // If truncates aren't free and there are users we can't
12774 // extend, it isn't worthwhile.
12775 if (!isTruncFree)
12776 return false;
12777 // Remember if this value is live-out.
12778 if (User->getOpcode() == ISD::CopyToReg)
12779 HasCopyToRegUses = true;
12780 }
12781
12782 if (HasCopyToRegUses) {
12783 bool BothLiveOut = false;
12784 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12785 UI != UE; ++UI) {
12786 SDUse &Use = UI.getUse();
12787 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12788 BothLiveOut = true;
12789 break;
12790 }
12791 }
12792 if (BothLiveOut)
12793 // Both unextended and extended values are live out. There had better be
12794 // a good reason for the transformation.
12795 return !ExtendNodes.empty();
12796 }
12797 return true;
12798}
12799
12800void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12801 SDValue OrigLoad, SDValue ExtLoad,
12802 ISD::NodeType ExtType) {
12803 // Extend SetCC uses if necessary.
12804 SDLoc DL(ExtLoad);
12805 for (SDNode *SetCC : SetCCs) {
12807
12808 for (unsigned j = 0; j != 2; ++j) {
12809 SDValue SOp = SetCC->getOperand(j);
12810 if (SOp == OrigLoad)
12811 Ops.push_back(ExtLoad);
12812 else
12813 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12814 }
12815
12816 Ops.push_back(SetCC->getOperand(2));
12817 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12818 }
12819}
12820
12821// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12822SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12823 SDValue N0 = N->getOperand(0);
12824 EVT DstVT = N->getValueType(0);
12825 EVT SrcVT = N0.getValueType();
12826
12827 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12828 N->getOpcode() == ISD::ZERO_EXTEND) &&
12829 "Unexpected node type (not an extend)!");
12830
12831 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12832 // For example, on a target with legal v4i32, but illegal v8i32, turn:
12833 // (v8i32 (sext (v8i16 (load x))))
12834 // into:
12835 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12836 // (v4i32 (sextload (x + 16)))))
12837 // Where uses of the original load, i.e.:
12838 // (v8i16 (load x))
12839 // are replaced with:
12840 // (v8i16 (truncate
12841 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12842 // (v4i32 (sextload (x + 16)))))))
12843 //
12844 // This combine is only applicable to illegal, but splittable, vectors.
12845 // All legal types, and illegal non-vector types, are handled elsewhere.
12846 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12847 //
12848 if (N0->getOpcode() != ISD::LOAD)
12849 return SDValue();
12850
12851 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12852
12853 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12854 !N0.hasOneUse() || !LN0->isSimple() ||
12855 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12857 return SDValue();
12858
12860 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12861 return SDValue();
12862
12863 ISD::LoadExtType ExtType =
12864 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12865
12866 // Try to split the vector types to get down to legal types.
12867 EVT SplitSrcVT = SrcVT;
12868 EVT SplitDstVT = DstVT;
12869 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12870 SplitSrcVT.getVectorNumElements() > 1) {
12871 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12872 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12873 }
12874
12875 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12876 return SDValue();
12877
12878 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
12879
12880 SDLoc DL(N);
12881 const unsigned NumSplits =
12882 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12883 const unsigned Stride = SplitSrcVT.getStoreSize();
12886
12887 SDValue BasePtr = LN0->getBasePtr();
12888 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12889 const unsigned Offset = Idx * Stride;
12890
12891 SDValue SplitLoad =
12892 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
12893 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
12894 SplitSrcVT, LN0->getOriginalAlign(),
12895 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12896
12897 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
12898
12899 Loads.push_back(SplitLoad.getValue(0));
12900 Chains.push_back(SplitLoad.getValue(1));
12901 }
12902
12903 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12904 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12905
12906 // Simplify TF.
12907 AddToWorklist(NewChain.getNode());
12908
12909 CombineTo(N, NewValue);
12910
12911 // Replace uses of the original load (before extension)
12912 // with a truncate of the concatenated sextloaded vectors.
12913 SDValue Trunc =
12914 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12915 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12916 CombineTo(N0.getNode(), Trunc, NewChain);
12917 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12918}
12919
12920// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12921// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12922SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12923 assert(N->getOpcode() == ISD::ZERO_EXTEND);
12924 EVT VT = N->getValueType(0);
12925 EVT OrigVT = N->getOperand(0).getValueType();
12926 if (TLI.isZExtFree(OrigVT, VT))
12927 return SDValue();
12928
12929 // and/or/xor
12930 SDValue N0 = N->getOperand(0);
12931 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
12932 N0.getOperand(1).getOpcode() != ISD::Constant ||
12933 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12934 return SDValue();
12935
12936 // shl/shr
12937 SDValue N1 = N0->getOperand(0);
12938 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12939 N1.getOperand(1).getOpcode() != ISD::Constant ||
12940 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12941 return SDValue();
12942
12943 // load
12944 if (!isa<LoadSDNode>(N1.getOperand(0)))
12945 return SDValue();
12946 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
12947 EVT MemVT = Load->getMemoryVT();
12948 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
12949 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
12950 return SDValue();
12951
12952
12953 // If the shift op is SHL, the logic op must be AND, otherwise the result
12954 // will be wrong.
12955 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
12956 return SDValue();
12957
12958 if (!N0.hasOneUse() || !N1.hasOneUse())
12959 return SDValue();
12960
12962 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
12963 ISD::ZERO_EXTEND, SetCCs, TLI))
12964 return SDValue();
12965
12966 // Actually do the transformation.
12967 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
12968 Load->getChain(), Load->getBasePtr(),
12969 Load->getMemoryVT(), Load->getMemOperand());
12970
12971 SDLoc DL1(N1);
12972 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
12973 N1.getOperand(1));
12974
12976 SDLoc DL0(N0);
12977 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
12978 DAG.getConstant(Mask, DL0, VT));
12979
12980 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12981 CombineTo(N, And);
12982 if (SDValue(Load, 0).hasOneUse()) {
12983 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
12984 } else {
12985 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
12986 Load->getValueType(0), ExtLoad);
12987 CombineTo(Load, Trunc, ExtLoad.getValue(1));
12988 }
12989
12990 // N0 is dead at this point.
12991 recursivelyDeleteUnusedNodes(N0.getNode());
12992
12993 return SDValue(N,0); // Return N so it doesn't get rechecked!
12994}
12995
12996/// If we're narrowing or widening the result of a vector select and the final
12997/// size is the same size as a setcc (compare) feeding the select, then try to
12998/// apply the cast operation to the select's operands because matching vector
12999/// sizes for a select condition and other operands should be more efficient.
13000SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13001 unsigned CastOpcode = Cast->getOpcode();
13002 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13003 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13004 CastOpcode == ISD::FP_ROUND) &&
13005 "Unexpected opcode for vector select narrowing/widening");
13006
13007 // We only do this transform before legal ops because the pattern may be
13008 // obfuscated by target-specific operations after legalization. Do not create
13009 // an illegal select op, however, because that may be difficult to lower.
13010 EVT VT = Cast->getValueType(0);
13011 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13012 return SDValue();
13013
13014 SDValue VSel = Cast->getOperand(0);
13015 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13016 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13017 return SDValue();
13018
13019 // Does the setcc have the same vector size as the casted select?
13020 SDValue SetCC = VSel.getOperand(0);
13021 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13022 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13023 return SDValue();
13024
13025 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13026 SDValue A = VSel.getOperand(1);
13027 SDValue B = VSel.getOperand(2);
13028 SDValue CastA, CastB;
13029 SDLoc DL(Cast);
13030 if (CastOpcode == ISD::FP_ROUND) {
13031 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13032 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13033 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13034 } else {
13035 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13036 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13037 }
13038 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13039}
13040
13041// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13042// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13044 const TargetLowering &TLI, EVT VT,
13045 bool LegalOperations, SDNode *N,
13046 SDValue N0, ISD::LoadExtType ExtLoadType) {
13047 SDNode *N0Node = N0.getNode();
13048 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13049 : ISD::isZEXTLoad(N0Node);
13050 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13051 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13052 return SDValue();
13053
13054 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13055 EVT MemVT = LN0->getMemoryVT();
13056 if ((LegalOperations || !LN0->isSimple() ||
13057 VT.isVector()) &&
13058 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13059 return SDValue();
13060
13061 SDValue ExtLoad =
13062 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13063 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13064 Combiner.CombineTo(N, ExtLoad);
13065 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13066 if (LN0->use_empty())
13067 Combiner.recursivelyDeleteUnusedNodes(LN0);
13068 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13069}
13070
13071// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13072// Only generate vector extloads when 1) they're legal, and 2) they are
13073// deemed desirable by the target. NonNegZExt can be set to true if a zero
13074// extend has the nonneg flag to allow use of sextload if profitable.
13076 const TargetLowering &TLI, EVT VT,
13077 bool LegalOperations, SDNode *N, SDValue N0,
13078 ISD::LoadExtType ExtLoadType,
13079 ISD::NodeType ExtOpc,
13080 bool NonNegZExt = false) {
13082 return {};
13083
13084 // If this is zext nneg, see if it would make sense to treat it as a sext.
13085 if (NonNegZExt) {
13086 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13087 "Unexpected load type or opcode");
13088 for (SDNode *User : N0->uses()) {
13089 if (User->getOpcode() == ISD::SETCC) {
13090 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13092 ExtLoadType = ISD::SEXTLOAD;
13093 ExtOpc = ISD::SIGN_EXTEND;
13094 break;
13095 }
13096 }
13097 }
13098 }
13099
13100 // TODO: isFixedLengthVector() should be removed and any negative effects on
13101 // code generation being the result of that target's implementation of
13102 // isVectorLoadExtDesirable().
13103 if ((LegalOperations || VT.isFixedLengthVector() ||
13104 !cast<LoadSDNode>(N0)->isSimple()) &&
13105 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13106 return {};
13107
13108 bool DoXform = true;
13110 if (!N0.hasOneUse())
13111 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13112 if (VT.isVector())
13113 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13114 if (!DoXform)
13115 return {};
13116
13117 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13118 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13119 LN0->getBasePtr(), N0.getValueType(),
13120 LN0->getMemOperand());
13121 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13122 // If the load value is used only by N, replace it via CombineTo N.
13123 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13124 Combiner.CombineTo(N, ExtLoad);
13125 if (NoReplaceTrunc) {
13126 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13127 Combiner.recursivelyDeleteUnusedNodes(LN0);
13128 } else {
13129 SDValue Trunc =
13130 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13131 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13132 }
13133 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13134}
13135
13136static SDValue
13138 bool LegalOperations, SDNode *N, SDValue N0,
13139 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13140 if (!N0.hasOneUse())
13141 return SDValue();
13142
13143 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13144 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13145 return SDValue();
13146
13147 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13148 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13149 return SDValue();
13150
13151 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13152 return SDValue();
13153
13154 SDLoc dl(Ld);
13155 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13156 SDValue NewLoad = DAG.getMaskedLoad(
13157 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13158 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13159 ExtLoadType, Ld->isExpandingLoad());
13160 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13161 return NewLoad;
13162}
13163
13164// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13166 const TargetLowering &TLI, EVT VT,
13167 SDValue N0,
13168 ISD::LoadExtType ExtLoadType) {
13169 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13170 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13171 return {};
13172 EVT MemoryVT = ALoad->getMemoryVT();
13173 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13174 return {};
13175 // Can't fold into ALoad if it is already extending differently.
13176 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13177 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13178 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13179 return {};
13180
13181 EVT OrigVT = ALoad->getValueType(0);
13182 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13183 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13184 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13185 ALoad->getBasePtr(), ALoad->getMemOperand()));
13186 NewALoad->setExtensionType(ExtLoadType);
13188 SDValue(ALoad, 0),
13189 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13190 // Update the chain uses.
13191 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13192 return SDValue(NewALoad, 0);
13193}
13194
13196 bool LegalOperations) {
13197 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13198 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13199
13200 SDValue SetCC = N->getOperand(0);
13201 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13202 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13203 return SDValue();
13204
13205 SDValue X = SetCC.getOperand(0);
13206 SDValue Ones = SetCC.getOperand(1);
13207 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13208 EVT VT = N->getValueType(0);
13209 EVT XVT = X.getValueType();
13210 // setge X, C is canonicalized to setgt, so we do not need to match that
13211 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13212 // not require the 'not' op.
13213 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13214 // Invert and smear/shift the sign bit:
13215 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13216 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13217 SDLoc DL(N);
13218 unsigned ShCt = VT.getSizeInBits() - 1;
13219 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13220 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13221 SDValue NotX = DAG.getNOT(DL, X, VT);
13222 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13223 auto ShiftOpcode =
13224 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13225 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13226 }
13227 }
13228 return SDValue();
13229}
13230
13231SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13232 SDValue N0 = N->getOperand(0);
13233 if (N0.getOpcode() != ISD::SETCC)
13234 return SDValue();
13235
13236 SDValue N00 = N0.getOperand(0);
13237 SDValue N01 = N0.getOperand(1);
13238 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13239 EVT VT = N->getValueType(0);
13240 EVT N00VT = N00.getValueType();
13241 SDLoc DL(N);
13242
13243 // Propagate fast-math-flags.
13244 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13245
13246 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13247 // the same size as the compared operands. Try to optimize sext(setcc())
13248 // if this is the case.
13249 if (VT.isVector() && !LegalOperations &&
13250 TLI.getBooleanContents(N00VT) ==
13252 EVT SVT = getSetCCResultType(N00VT);
13253
13254 // If we already have the desired type, don't change it.
13255 if (SVT != N0.getValueType()) {
13256 // We know that the # elements of the results is the same as the
13257 // # elements of the compare (and the # elements of the compare result
13258 // for that matter). Check to see that they are the same size. If so,
13259 // we know that the element size of the sext'd result matches the
13260 // element size of the compare operands.
13261 if (VT.getSizeInBits() == SVT.getSizeInBits())
13262 return DAG.getSetCC(DL, VT, N00, N01, CC);
13263
13264 // If the desired elements are smaller or larger than the source
13265 // elements, we can use a matching integer vector type and then
13266 // truncate/sign extend.
13267 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13268 if (SVT == MatchingVecType) {
13269 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13270 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13271 }
13272 }
13273
13274 // Try to eliminate the sext of a setcc by zexting the compare operands.
13275 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13277 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13278 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13279 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13280
13281 // We have an unsupported narrow vector compare op that would be legal
13282 // if extended to the destination type. See if the compare operands
13283 // can be freely extended to the destination type.
13284 auto IsFreeToExtend = [&](SDValue V) {
13285 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13286 return true;
13287 // Match a simple, non-extended load that can be converted to a
13288 // legal {z/s}ext-load.
13289 // TODO: Allow widening of an existing {z/s}ext-load?
13290 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13291 ISD::isUNINDEXEDLoad(V.getNode()) &&
13292 cast<LoadSDNode>(V)->isSimple() &&
13293 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13294 return false;
13295
13296 // Non-chain users of this value must either be the setcc in this
13297 // sequence or extends that can be folded into the new {z/s}ext-load.
13298 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13299 UI != UE; ++UI) {
13300 // Skip uses of the chain and the setcc.
13301 SDNode *User = *UI;
13302 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13303 continue;
13304 // Extra users must have exactly the same cast we are about to create.
13305 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13306 // is enhanced similarly.
13307 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13308 return false;
13309 }
13310 return true;
13311 };
13312
13313 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13314 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13315 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13316 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13317 }
13318 }
13319 }
13320
13321 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13322 // Here, T can be 1 or -1, depending on the type of the setcc and
13323 // getBooleanContents().
13324 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13325
13326 // To determine the "true" side of the select, we need to know the high bit
13327 // of the value returned by the setcc if it evaluates to true.
13328 // If the type of the setcc is i1, then the true case of the select is just
13329 // sext(i1 1), that is, -1.
13330 // If the type of the setcc is larger (say, i8) then the value of the high
13331 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13332 // of the appropriate width.
13333 SDValue ExtTrueVal = (SetCCWidth == 1)
13334 ? DAG.getAllOnesConstant(DL, VT)
13335 : DAG.getBoolConstant(true, DL, VT, N00VT);
13336 SDValue Zero = DAG.getConstant(0, DL, VT);
13337 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13338 return SCC;
13339
13340 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13341 EVT SetCCVT = getSetCCResultType(N00VT);
13342 // Don't do this transform for i1 because there's a select transform
13343 // that would reverse it.
13344 // TODO: We should not do this transform at all without a target hook
13345 // because a sext is likely cheaper than a select?
13346 if (SetCCVT.getScalarSizeInBits() != 1 &&
13347 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13348 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13349 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13350 }
13351 }
13352
13353 return SDValue();
13354}
13355
13356SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13357 SDValue N0 = N->getOperand(0);
13358 EVT VT = N->getValueType(0);
13359 SDLoc DL(N);
13360
13361 if (VT.isVector())
13362 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13363 return FoldedVOp;
13364
13365 // sext(undef) = 0 because the top bit will all be the same.
13366 if (N0.isUndef())
13367 return DAG.getConstant(0, DL, VT);
13368
13369 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13370 return Res;
13371
13372 // fold (sext (sext x)) -> (sext x)
13373 // fold (sext (aext x)) -> (sext x)
13374 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13375 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13376
13377 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13378 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13382 N0.getOperand(0));
13383
13384 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13385 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13386 SDValue N00 = N0.getOperand(0);
13387 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13388 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13389 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13390 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13391 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13392 }
13393 }
13394
13395 if (N0.getOpcode() == ISD::TRUNCATE) {
13396 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13397 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13398 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13399 SDNode *oye = N0.getOperand(0).getNode();
13400 if (NarrowLoad.getNode() != N0.getNode()) {
13401 CombineTo(N0.getNode(), NarrowLoad);
13402 // CombineTo deleted the truncate, if needed, but not what's under it.
13403 AddToWorklist(oye);
13404 }
13405 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13406 }
13407
13408 // See if the value being truncated is already sign extended. If so, just
13409 // eliminate the trunc/sext pair.
13410 SDValue Op = N0.getOperand(0);
13411 unsigned OpBits = Op.getScalarValueSizeInBits();
13412 unsigned MidBits = N0.getScalarValueSizeInBits();
13413 unsigned DestBits = VT.getScalarSizeInBits();
13414 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13415
13416 if (OpBits == DestBits) {
13417 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13418 // bits, it is already ready.
13419 if (NumSignBits > DestBits-MidBits)
13420 return Op;
13421 } else if (OpBits < DestBits) {
13422 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13423 // bits, just sext from i32.
13424 if (NumSignBits > OpBits-MidBits)
13425 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13426 } else {
13427 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13428 // bits, just truncate to i32.
13429 if (NumSignBits > OpBits-MidBits)
13430 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13431 }
13432
13433 // fold (sext (truncate x)) -> (sextinreg x).
13434 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13435 N0.getValueType())) {
13436 if (OpBits < DestBits)
13437 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13438 else if (OpBits > DestBits)
13439 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13440 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13441 DAG.getValueType(N0.getValueType()));
13442 }
13443 }
13444
13445 // Try to simplify (sext (load x)).
13446 if (SDValue foldedExt =
13447 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13449 return foldedExt;
13450
13451 if (SDValue foldedExt =
13452 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13454 return foldedExt;
13455
13456 // fold (sext (load x)) to multiple smaller sextloads.
13457 // Only on illegal but splittable vectors.
13458 if (SDValue ExtLoad = CombineExtLoad(N))
13459 return ExtLoad;
13460
13461 // Try to simplify (sext (sextload x)).
13462 if (SDValue foldedExt = tryToFoldExtOfExtload(
13463 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13464 return foldedExt;
13465
13466 // Try to simplify (sext (atomic_load x)).
13467 if (SDValue foldedExt =
13468 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13469 return foldedExt;
13470
13471 // fold (sext (and/or/xor (load x), cst)) ->
13472 // (and/or/xor (sextload x), (sext cst))
13473 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13474 isa<LoadSDNode>(N0.getOperand(0)) &&
13475 N0.getOperand(1).getOpcode() == ISD::Constant &&
13476 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13477 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13478 EVT MemVT = LN00->getMemoryVT();
13479 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13480 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13482 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13483 ISD::SIGN_EXTEND, SetCCs, TLI);
13484 if (DoXform) {
13485 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13486 LN00->getChain(), LN00->getBasePtr(),
13487 LN00->getMemoryVT(),
13488 LN00->getMemOperand());
13490 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13491 ExtLoad, DAG.getConstant(Mask, DL, VT));
13492 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13493 bool NoReplaceTruncAnd = !N0.hasOneUse();
13494 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13495 CombineTo(N, And);
13496 // If N0 has multiple uses, change other uses as well.
13497 if (NoReplaceTruncAnd) {
13498 SDValue TruncAnd =
13500 CombineTo(N0.getNode(), TruncAnd);
13501 }
13502 if (NoReplaceTrunc) {
13503 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13504 } else {
13505 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13506 LN00->getValueType(0), ExtLoad);
13507 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13508 }
13509 return SDValue(N,0); // Return N so it doesn't get rechecked!
13510 }
13511 }
13512 }
13513
13514 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13515 return V;
13516
13517 if (SDValue V = foldSextSetcc(N))
13518 return V;
13519
13520 // fold (sext x) -> (zext x) if the sign bit is known zero.
13521 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13522 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13523 DAG.SignBitIsZero(N0)) {
13525 Flags.setNonNeg(true);
13526 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13527 }
13528
13529 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13530 return NewVSel;
13531
13532 // Eliminate this sign extend by doing a negation in the destination type:
13533 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13534 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13538 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13539 return DAG.getNegative(Zext, DL, VT);
13540 }
13541 // Eliminate this sign extend by doing a decrement in the destination type:
13542 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13543 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13547 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13548 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13549 }
13550
13551 // fold sext (not i1 X) -> add (zext i1 X), -1
13552 // TODO: This could be extended to handle bool vectors.
13553 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13554 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13555 TLI.isOperationLegal(ISD::ADD, VT)))) {
13556 // If we can eliminate the 'not', the sext form should be better
13557 if (SDValue NewXor = visitXOR(N0.getNode())) {
13558 // Returning N0 is a form of in-visit replacement that may have
13559 // invalidated N0.
13560 if (NewXor.getNode() == N0.getNode()) {
13561 // Return SDValue here as the xor should have already been replaced in
13562 // this sext.
13563 return SDValue();
13564 }
13565
13566 // Return a new sext with the new xor.
13567 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13568 }
13569
13570 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13571 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13572 }
13573
13574 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13575 return Res;
13576
13577 return SDValue();
13578}
13579
13580/// Given an extending node with a pop-count operand, if the target does not
13581/// support a pop-count in the narrow source type but does support it in the
13582/// destination type, widen the pop-count to the destination type.
13583static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13584 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13585 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13586
13587 SDValue CtPop = Extend->getOperand(0);
13588 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13589 return SDValue();
13590
13591 EVT VT = Extend->getValueType(0);
13592 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13595 return SDValue();
13596
13597 // zext (ctpop X) --> ctpop (zext X)
13598 SDLoc DL(Extend);
13599 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13600 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13601}
13602
13603// If we have (zext (abs X)) where X is a type that will be promoted by type
13604// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13605static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13606 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13607
13608 EVT VT = Extend->getValueType(0);
13609 if (VT.isVector())
13610 return SDValue();
13611
13612 SDValue Abs = Extend->getOperand(0);
13613 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13614 return SDValue();
13615
13616 EVT AbsVT = Abs.getValueType();
13617 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13618 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13620 return SDValue();
13621
13622 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13623
13624 SDValue SExt =
13625 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13626 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13627 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13628}
13629
13630SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13631 SDValue N0 = N->getOperand(0);
13632 EVT VT = N->getValueType(0);
13633 SDLoc DL(N);
13634
13635 if (VT.isVector())
13636 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13637 return FoldedVOp;
13638
13639 // zext(undef) = 0
13640 if (N0.isUndef())
13641 return DAG.getConstant(0, DL, VT);
13642
13643 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13644 return Res;
13645
13646 // fold (zext (zext x)) -> (zext x)
13647 // fold (zext (aext x)) -> (zext x)
13648 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13650 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13651 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13652 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13653 }
13654
13655 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13656 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13660 N0.getOperand(0));
13661
13662 // fold (zext (truncate x)) -> (zext x) or
13663 // (zext (truncate x)) -> (truncate x)
13664 // This is valid when the truncated bits of x are already zero.
13665 SDValue Op;
13666 KnownBits Known;
13667 if (isTruncateOf(DAG, N0, Op, Known)) {
13668 APInt TruncatedBits =
13669 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13670 APInt(Op.getScalarValueSizeInBits(), 0) :
13671 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13673 std::min(Op.getScalarValueSizeInBits(),
13674 VT.getScalarSizeInBits()));
13675 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13676 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13677 DAG.salvageDebugInfo(*N0.getNode());
13678
13679 return ZExtOrTrunc;
13680 }
13681 }
13682
13683 // fold (zext (truncate x)) -> (and x, mask)
13684 if (N0.getOpcode() == ISD::TRUNCATE) {
13685 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13686 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13687 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13688 SDNode *oye = N0.getOperand(0).getNode();
13689 if (NarrowLoad.getNode() != N0.getNode()) {
13690 CombineTo(N0.getNode(), NarrowLoad);
13691 // CombineTo deleted the truncate, if needed, but not what's under it.
13692 AddToWorklist(oye);
13693 }
13694 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13695 }
13696
13697 EVT SrcVT = N0.getOperand(0).getValueType();
13698 EVT MinVT = N0.getValueType();
13699
13700 if (N->getFlags().hasNonNeg()) {
13701 SDValue Op = N0.getOperand(0);
13702 unsigned OpBits = SrcVT.getScalarSizeInBits();
13703 unsigned MidBits = MinVT.getScalarSizeInBits();
13704 unsigned DestBits = VT.getScalarSizeInBits();
13705 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13706
13707 if (OpBits == DestBits) {
13708 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13709 // bits, it is already ready.
13710 if (NumSignBits > DestBits - MidBits)
13711 return Op;
13712 } else if (OpBits < DestBits) {
13713 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13714 // bits, just sext from i32.
13715 // FIXME: This can probably be ZERO_EXTEND nneg?
13716 if (NumSignBits > OpBits - MidBits)
13717 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13718 } else {
13719 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13720 // bits, just truncate to i32.
13721 if (NumSignBits > OpBits - MidBits)
13722 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13723 }
13724 }
13725
13726 // Try to mask before the extension to avoid having to generate a larger mask,
13727 // possibly over several sub-vectors.
13728 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13729 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13731 SDValue Op = N0.getOperand(0);
13732 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13733 AddToWorklist(Op.getNode());
13734 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13735 // Transfer the debug info; the new node is equivalent to N0.
13736 DAG.transferDbgValues(N0, ZExtOrTrunc);
13737 return ZExtOrTrunc;
13738 }
13739 }
13740
13741 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13742 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13743 AddToWorklist(Op.getNode());
13744 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13745 // We may safely transfer the debug info describing the truncate node over
13746 // to the equivalent and operation.
13747 DAG.transferDbgValues(N0, And);
13748 return And;
13749 }
13750 }
13751
13752 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13753 // if either of the casts is not free.
13754 if (N0.getOpcode() == ISD::AND &&
13755 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13756 N0.getOperand(1).getOpcode() == ISD::Constant &&
13757 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13758 !TLI.isZExtFree(N0.getValueType(), VT))) {
13759 SDValue X = N0.getOperand(0).getOperand(0);
13760 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13762 return DAG.getNode(ISD::AND, DL, VT,
13763 X, DAG.getConstant(Mask, DL, VT));
13764 }
13765
13766 // Try to simplify (zext (load x)).
13767 if (SDValue foldedExt = tryToFoldExtOfLoad(
13768 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13769 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13770 return foldedExt;
13771
13772 if (SDValue foldedExt =
13773 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13775 return foldedExt;
13776
13777 // fold (zext (load x)) to multiple smaller zextloads.
13778 // Only on illegal but splittable vectors.
13779 if (SDValue ExtLoad = CombineExtLoad(N))
13780 return ExtLoad;
13781
13782 // Try to simplify (zext (atomic_load x)).
13783 if (SDValue foldedExt =
13784 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13785 return foldedExt;
13786
13787 // fold (zext (and/or/xor (load x), cst)) ->
13788 // (and/or/xor (zextload x), (zext cst))
13789 // Unless (and (load x) cst) will match as a zextload already and has
13790 // additional users, or the zext is already free.
13791 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13792 isa<LoadSDNode>(N0.getOperand(0)) &&
13793 N0.getOperand(1).getOpcode() == ISD::Constant &&
13794 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13795 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13796 EVT MemVT = LN00->getMemoryVT();
13797 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13798 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13799 bool DoXform = true;
13801 if (!N0.hasOneUse()) {
13802 if (N0.getOpcode() == ISD::AND) {
13803 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13804 EVT LoadResultTy = AndC->getValueType(0);
13805 EVT ExtVT;
13806 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13807 DoXform = false;
13808 }
13809 }
13810 if (DoXform)
13811 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13812 ISD::ZERO_EXTEND, SetCCs, TLI);
13813 if (DoXform) {
13814 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13815 LN00->getChain(), LN00->getBasePtr(),
13816 LN00->getMemoryVT(),
13817 LN00->getMemOperand());
13819 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13820 ExtLoad, DAG.getConstant(Mask, DL, VT));
13821 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13822 bool NoReplaceTruncAnd = !N0.hasOneUse();
13823 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13824 CombineTo(N, And);
13825 // If N0 has multiple uses, change other uses as well.
13826 if (NoReplaceTruncAnd) {
13827 SDValue TruncAnd =
13829 CombineTo(N0.getNode(), TruncAnd);
13830 }
13831 if (NoReplaceTrunc) {
13832 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13833 } else {
13834 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13835 LN00->getValueType(0), ExtLoad);
13836 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13837 }
13838 return SDValue(N,0); // Return N so it doesn't get rechecked!
13839 }
13840 }
13841 }
13842
13843 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13844 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13845 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13846 return ZExtLoad;
13847
13848 // Try to simplify (zext (zextload x)).
13849 if (SDValue foldedExt = tryToFoldExtOfExtload(
13850 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13851 return foldedExt;
13852
13853 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13854 return V;
13855
13856 if (N0.getOpcode() == ISD::SETCC) {
13857 // Propagate fast-math-flags.
13858 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13859
13860 // Only do this before legalize for now.
13861 if (!LegalOperations && VT.isVector() &&
13862 N0.getValueType().getVectorElementType() == MVT::i1) {
13863 EVT N00VT = N0.getOperand(0).getValueType();
13864 if (getSetCCResultType(N00VT) == N0.getValueType())
13865 return SDValue();
13866
13867 // We know that the # elements of the results is the same as the #
13868 // elements of the compare (and the # elements of the compare result for
13869 // that matter). Check to see that they are the same size. If so, we know
13870 // that the element size of the sext'd result matches the element size of
13871 // the compare operands.
13872 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13873 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13874 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13875 N0.getOperand(1), N0.getOperand(2));
13876 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13877 }
13878
13879 // If the desired elements are smaller or larger than the source
13880 // elements we can use a matching integer vector type and then
13881 // truncate/any extend followed by zext_in_reg.
13882 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13883 SDValue VsetCC =
13884 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13885 N0.getOperand(1), N0.getOperand(2));
13886 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13887 N0.getValueType());
13888 }
13889
13890 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13891 EVT N0VT = N0.getValueType();
13892 EVT N00VT = N0.getOperand(0).getValueType();
13893 if (SDValue SCC = SimplifySelectCC(
13894 DL, N0.getOperand(0), N0.getOperand(1),
13895 DAG.getBoolConstant(true, DL, N0VT, N00VT),
13896 DAG.getBoolConstant(false, DL, N0VT, N00VT),
13897 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13898 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13899 }
13900
13901 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13902 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13903 !TLI.isZExtFree(N0, VT)) {
13904 SDValue ShVal = N0.getOperand(0);
13905 SDValue ShAmt = N0.getOperand(1);
13906 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
13907 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
13908 if (N0.getOpcode() == ISD::SHL) {
13909 // If the original shl may be shifting out bits, do not perform this
13910 // transformation.
13911 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
13912 ShVal.getOperand(0).getValueSizeInBits();
13913 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13914 // If the shift is too large, then see if we can deduce that the
13915 // shift is safe anyway.
13916 // Create a mask that has ones for the bits being shifted out.
13917 APInt ShiftOutMask =
13919 ShAmtC->getAPIntValue().getZExtValue());
13920
13921 // Check if the bits being shifted out are known to be zero.
13922 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
13923 return SDValue();
13924 }
13925 }
13926
13927 // Ensure that the shift amount is wide enough for the shifted value.
13928 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13929 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13930
13931 return DAG.getNode(N0.getOpcode(), DL, VT,
13932 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
13933 }
13934 }
13935 }
13936
13937 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13938 return NewVSel;
13939
13940 if (SDValue NewCtPop = widenCtPop(N, DAG))
13941 return NewCtPop;
13942
13943 if (SDValue V = widenAbs(N, DAG))
13944 return V;
13945
13946 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13947 return Res;
13948
13949 // CSE zext nneg with sext if the zext is not free.
13950 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
13951 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
13952 if (CSENode)
13953 return SDValue(CSENode, 0);
13954 }
13955
13956 return SDValue();
13957}
13958
13959SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13960 SDValue N0 = N->getOperand(0);
13961 EVT VT = N->getValueType(0);
13962 SDLoc DL(N);
13963
13964 // aext(undef) = undef
13965 if (N0.isUndef())
13966 return DAG.getUNDEF(VT);
13967
13968 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13969 return Res;
13970
13971 // fold (aext (aext x)) -> (aext x)
13972 // fold (aext (zext x)) -> (zext x)
13973 // fold (aext (sext x)) -> (sext x)
13974 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
13975 N0.getOpcode() == ISD::SIGN_EXTEND) {
13977 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13978 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13979 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
13980 }
13981
13982 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
13983 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13984 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13988 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
13989
13990 // fold (aext (truncate (load x))) -> (aext (smaller load x))
13991 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
13992 if (N0.getOpcode() == ISD::TRUNCATE) {
13993 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13994 SDNode *oye = N0.getOperand(0).getNode();
13995 if (NarrowLoad.getNode() != N0.getNode()) {
13996 CombineTo(N0.getNode(), NarrowLoad);
13997 // CombineTo deleted the truncate, if needed, but not what's under it.
13998 AddToWorklist(oye);
13999 }
14000 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14001 }
14002 }
14003
14004 // fold (aext (truncate x))
14005 if (N0.getOpcode() == ISD::TRUNCATE)
14006 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14007
14008 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14009 // if the trunc is not free.
14010 if (N0.getOpcode() == ISD::AND &&
14011 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14012 N0.getOperand(1).getOpcode() == ISD::Constant &&
14013 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14014 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14015 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14016 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14017 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14018 }
14019
14020 // fold (aext (load x)) -> (aext (truncate (extload x)))
14021 // None of the supported targets knows how to perform load and any_ext
14022 // on vectors in one instruction, so attempt to fold to zext instead.
14023 if (VT.isVector()) {
14024 // Try to simplify (zext (load x)).
14025 if (SDValue foldedExt =
14026 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14028 return foldedExt;
14029 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14031 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14032 bool DoXform = true;
14034 if (!N0.hasOneUse())
14035 DoXform =
14036 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14037 if (DoXform) {
14038 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14039 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14040 LN0->getBasePtr(), N0.getValueType(),
14041 LN0->getMemOperand());
14042 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14043 // If the load value is used only by N, replace it via CombineTo N.
14044 bool NoReplaceTrunc = N0.hasOneUse();
14045 CombineTo(N, ExtLoad);
14046 if (NoReplaceTrunc) {
14047 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14048 recursivelyDeleteUnusedNodes(LN0);
14049 } else {
14050 SDValue Trunc =
14051 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14052 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14053 }
14054 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14055 }
14056 }
14057
14058 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14059 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14060 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14061 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14062 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14063 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14064 ISD::LoadExtType ExtType = LN0->getExtensionType();
14065 EVT MemVT = LN0->getMemoryVT();
14066 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14067 SDValue ExtLoad =
14068 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14069 MemVT, LN0->getMemOperand());
14070 CombineTo(N, ExtLoad);
14071 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14072 recursivelyDeleteUnusedNodes(LN0);
14073 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14074 }
14075 }
14076
14077 if (N0.getOpcode() == ISD::SETCC) {
14078 // Propagate fast-math-flags.
14079 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14080
14081 // For vectors:
14082 // aext(setcc) -> vsetcc
14083 // aext(setcc) -> truncate(vsetcc)
14084 // aext(setcc) -> aext(vsetcc)
14085 // Only do this before legalize for now.
14086 if (VT.isVector() && !LegalOperations) {
14087 EVT N00VT = N0.getOperand(0).getValueType();
14088 if (getSetCCResultType(N00VT) == N0.getValueType())
14089 return SDValue();
14090
14091 // We know that the # elements of the results is the same as the
14092 // # elements of the compare (and the # elements of the compare result
14093 // for that matter). Check to see that they are the same size. If so,
14094 // we know that the element size of the sext'd result matches the
14095 // element size of the compare operands.
14096 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14097 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14098 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14099
14100 // If the desired elements are smaller or larger than the source
14101 // elements we can use a matching integer vector type and then
14102 // truncate/any extend
14103 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14104 SDValue VsetCC = DAG.getSetCC(
14105 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14106 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14107 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14108 }
14109
14110 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14111 if (SDValue SCC = SimplifySelectCC(
14112 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14113 DAG.getConstant(0, DL, VT),
14114 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14115 return SCC;
14116 }
14117
14118 if (SDValue NewCtPop = widenCtPop(N, DAG))
14119 return NewCtPop;
14120
14121 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14122 return Res;
14123
14124 return SDValue();
14125}
14126
14127SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14128 unsigned Opcode = N->getOpcode();
14129 SDValue N0 = N->getOperand(0);
14130 SDValue N1 = N->getOperand(1);
14131 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14132
14133 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14134 if (N0.getOpcode() == Opcode &&
14135 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14136 return N0;
14137
14138 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14139 N0.getOperand(0).getOpcode() == Opcode) {
14140 // We have an assert, truncate, assert sandwich. Make one stronger assert
14141 // by asserting on the smallest asserted type to the larger source type.
14142 // This eliminates the later assert:
14143 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14144 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14145 SDLoc DL(N);
14146 SDValue BigA = N0.getOperand(0);
14147 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14148 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14149 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14150 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14151 BigA.getOperand(0), MinAssertVTVal);
14152 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14153 }
14154
14155 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14156 // than X. Just move the AssertZext in front of the truncate and drop the
14157 // AssertSExt.
14158 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14160 Opcode == ISD::AssertZext) {
14161 SDValue BigA = N0.getOperand(0);
14162 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14163 if (AssertVT.bitsLT(BigA_AssertVT)) {
14164 SDLoc DL(N);
14165 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14166 BigA.getOperand(0), N1);
14167 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14168 }
14169 }
14170
14171 return SDValue();
14172}
14173
14174SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14175 SDLoc DL(N);
14176
14177 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14178 SDValue N0 = N->getOperand(0);
14179
14180 // Fold (assertalign (assertalign x, AL0), AL1) ->
14181 // (assertalign x, max(AL0, AL1))
14182 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14183 return DAG.getAssertAlign(DL, N0.getOperand(0),
14184 std::max(AL, AAN->getAlign()));
14185
14186 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14187 // this assert down to source operands so that those arithmetic ops could be
14188 // exposed to the DAG combining.
14189 switch (N0.getOpcode()) {
14190 default:
14191 break;
14192 case ISD::ADD:
14193 case ISD::SUB: {
14194 unsigned AlignShift = Log2(AL);
14195 SDValue LHS = N0.getOperand(0);
14196 SDValue RHS = N0.getOperand(1);
14197 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14198 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14199 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14200 if (LHSAlignShift < AlignShift)
14201 LHS = DAG.getAssertAlign(DL, LHS, AL);
14202 if (RHSAlignShift < AlignShift)
14203 RHS = DAG.getAssertAlign(DL, RHS, AL);
14204 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14205 }
14206 break;
14207 }
14208 }
14209
14210 return SDValue();
14211}
14212
14213/// If the result of a load is shifted/masked/truncated to an effectively
14214/// narrower type, try to transform the load to a narrower type and/or
14215/// use an extending load.
14216SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14217 unsigned Opc = N->getOpcode();
14218
14220 SDValue N0 = N->getOperand(0);
14221 EVT VT = N->getValueType(0);
14222 EVT ExtVT = VT;
14223
14224 // This transformation isn't valid for vector loads.
14225 if (VT.isVector())
14226 return SDValue();
14227
14228 // The ShAmt variable is used to indicate that we've consumed a right
14229 // shift. I.e. we want to narrow the width of the load by skipping to load the
14230 // ShAmt least significant bits.
14231 unsigned ShAmt = 0;
14232 // A special case is when the least significant bits from the load are masked
14233 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14234 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14235 // the result.
14236 unsigned ShiftedOffset = 0;
14237 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14238 // extended to VT.
14239 if (Opc == ISD::SIGN_EXTEND_INREG) {
14240 ExtType = ISD::SEXTLOAD;
14241 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14242 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14243 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14244 // value, or it may be shifting a higher subword, half or byte into the
14245 // lowest bits.
14246
14247 // Only handle shift with constant shift amount, and the shiftee must be a
14248 // load.
14249 auto *LN = dyn_cast<LoadSDNode>(N0);
14250 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14251 if (!N1C || !LN)
14252 return SDValue();
14253 // If the shift amount is larger than the memory type then we're not
14254 // accessing any of the loaded bytes.
14255 ShAmt = N1C->getZExtValue();
14256 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14257 if (MemoryWidth <= ShAmt)
14258 return SDValue();
14259 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14260 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14261 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14262 // If original load is a SEXTLOAD then we can't simply replace it by a
14263 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14264 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14265 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14266 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14267 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14268 LN->getExtensionType() != ExtType)
14269 return SDValue();
14270 } else if (Opc == ISD::AND) {
14271 // An AND with a constant mask is the same as a truncate + zero-extend.
14272 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14273 if (!AndC)
14274 return SDValue();
14275
14276 const APInt &Mask = AndC->getAPIntValue();
14277 unsigned ActiveBits = 0;
14278 if (Mask.isMask()) {
14279 ActiveBits = Mask.countr_one();
14280 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14281 ShiftedOffset = ShAmt;
14282 } else {
14283 return SDValue();
14284 }
14285
14286 ExtType = ISD::ZEXTLOAD;
14287 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14288 }
14289
14290 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14291 // a right shift. Here we redo some of those checks, to possibly adjust the
14292 // ExtVT even further based on "a masking AND". We could also end up here for
14293 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14294 // need to be done here as well.
14295 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14296 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14297 // Bail out when the SRL has more than one use. This is done for historical
14298 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14299 // check below? And maybe it could be non-profitable to do the transform in
14300 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14301 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14302 if (!SRL.hasOneUse())
14303 return SDValue();
14304
14305 // Only handle shift with constant shift amount, and the shiftee must be a
14306 // load.
14307 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14308 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14309 if (!SRL1C || !LN)
14310 return SDValue();
14311
14312 // If the shift amount is larger than the input type then we're not
14313 // accessing any of the loaded bytes. If the load was a zextload/extload
14314 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14315 ShAmt = SRL1C->getZExtValue();
14316 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14317 if (ShAmt >= MemoryWidth)
14318 return SDValue();
14319
14320 // Because a SRL must be assumed to *need* to zero-extend the high bits
14321 // (as opposed to anyext the high bits), we can't combine the zextload
14322 // lowering of SRL and an sextload.
14323 if (LN->getExtensionType() == ISD::SEXTLOAD)
14324 return SDValue();
14325
14326 // Avoid reading outside the memory accessed by the original load (could
14327 // happened if we only adjust the load base pointer by ShAmt). Instead we
14328 // try to narrow the load even further. The typical scenario here is:
14329 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14330 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14331 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14332 // Don't replace sextload by zextload.
14333 if (ExtType == ISD::SEXTLOAD)
14334 return SDValue();
14335 // Narrow the load.
14336 ExtType = ISD::ZEXTLOAD;
14337 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14338 }
14339
14340 // If the SRL is only used by a masking AND, we may be able to adjust
14341 // the ExtVT to make the AND redundant.
14342 SDNode *Mask = *(SRL->use_begin());
14343 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14344 isa<ConstantSDNode>(Mask->getOperand(1))) {
14345 unsigned Offset, ActiveBits;
14346 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14347 if (ShiftMask.isMask()) {
14348 EVT MaskedVT =
14349 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14350 // If the mask is smaller, recompute the type.
14351 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14352 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14353 ExtVT = MaskedVT;
14354 } else if (ExtType == ISD::ZEXTLOAD &&
14355 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14356 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14357 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14358 // If the mask is shifted we can use a narrower load and a shl to insert
14359 // the trailing zeros.
14360 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14361 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14362 ExtVT = MaskedVT;
14363 ShAmt = Offset + ShAmt;
14364 ShiftedOffset = Offset;
14365 }
14366 }
14367 }
14368
14369 N0 = SRL.getOperand(0);
14370 }
14371
14372 // If the load is shifted left (and the result isn't shifted back right), we
14373 // can fold a truncate through the shift. The typical scenario is that N
14374 // points at a TRUNCATE here so the attempted fold is:
14375 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14376 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14377 unsigned ShLeftAmt = 0;
14378 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14379 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14380 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14381 ShLeftAmt = N01->getZExtValue();
14382 N0 = N0.getOperand(0);
14383 }
14384 }
14385
14386 // If we haven't found a load, we can't narrow it.
14387 if (!isa<LoadSDNode>(N0))
14388 return SDValue();
14389
14390 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14391 // Reducing the width of a volatile load is illegal. For atomics, we may be
14392 // able to reduce the width provided we never widen again. (see D66309)
14393 if (!LN0->isSimple() ||
14394 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14395 return SDValue();
14396
14397 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14398 unsigned LVTStoreBits =
14400 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14401 return LVTStoreBits - EVTStoreBits - ShAmt;
14402 };
14403
14404 // We need to adjust the pointer to the load by ShAmt bits in order to load
14405 // the correct bytes.
14406 unsigned PtrAdjustmentInBits =
14407 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14408
14409 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14410 SDLoc DL(LN0);
14411 // The original load itself didn't wrap, so an offset within it doesn't.
14413 Flags.setNoUnsignedWrap(true);
14414 SDValue NewPtr = DAG.getMemBasePlusOffset(
14415 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14416 AddToWorklist(NewPtr.getNode());
14417
14418 SDValue Load;
14419 if (ExtType == ISD::NON_EXTLOAD)
14420 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14421 LN0->getPointerInfo().getWithOffset(PtrOff),
14422 LN0->getOriginalAlign(),
14423 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14424 else
14425 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14426 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14427 LN0->getOriginalAlign(),
14428 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14429
14430 // Replace the old load's chain with the new load's chain.
14431 WorklistRemover DeadNodes(*this);
14432 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14433
14434 // Shift the result left, if we've swallowed a left shift.
14436 if (ShLeftAmt != 0) {
14437 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14438 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14439 ShImmTy = VT;
14440 // If the shift amount is as large as the result size (but, presumably,
14441 // no larger than the source) then the useful bits of the result are
14442 // zero; we can't simply return the shortened shift, because the result
14443 // of that operation is undefined.
14444 if (ShLeftAmt >= VT.getScalarSizeInBits())
14445 Result = DAG.getConstant(0, DL, VT);
14446 else
14447 Result = DAG.getNode(ISD::SHL, DL, VT,
14448 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14449 }
14450
14451 if (ShiftedOffset != 0) {
14452 // We're using a shifted mask, so the load now has an offset. This means
14453 // that data has been loaded into the lower bytes than it would have been
14454 // before, so we need to shl the loaded data into the correct position in the
14455 // register.
14456 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14457 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14458 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14459 }
14460
14461 // Return the new loaded value.
14462 return Result;
14463}
14464
14465SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14466 SDValue N0 = N->getOperand(0);
14467 SDValue N1 = N->getOperand(1);
14468 EVT VT = N->getValueType(0);
14469 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14470 unsigned VTBits = VT.getScalarSizeInBits();
14471 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14472
14473 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14474 if (N0.isUndef())
14475 return DAG.getConstant(0, SDLoc(N), VT);
14476
14477 // fold (sext_in_reg c1) -> c1
14479 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14480
14481 // If the input is already sign extended, just drop the extension.
14482 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14483 return N0;
14484
14485 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14486 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14487 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14488 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14489 N1);
14490
14491 // fold (sext_in_reg (sext x)) -> (sext x)
14492 // fold (sext_in_reg (aext x)) -> (sext x)
14493 // if x is small enough or if we know that x has more than 1 sign bit and the
14494 // sign_extend_inreg is extending from one of them.
14495 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14496 SDValue N00 = N0.getOperand(0);
14497 unsigned N00Bits = N00.getScalarValueSizeInBits();
14498 if ((N00Bits <= ExtVTBits ||
14499 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14500 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14501 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14502 }
14503
14504 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14505 // if x is small enough or if we know that x has more than 1 sign bit and the
14506 // sign_extend_inreg is extending from one of them.
14508 SDValue N00 = N0.getOperand(0);
14509 unsigned N00Bits = N00.getScalarValueSizeInBits();
14510 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14511 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14512 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14513 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14514 if ((N00Bits == ExtVTBits ||
14515 (!IsZext && (N00Bits < ExtVTBits ||
14516 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14517 (!LegalOperations ||
14519 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14520 }
14521
14522 // fold (sext_in_reg (zext x)) -> (sext x)
14523 // iff we are extending the source sign bit.
14524 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14525 SDValue N00 = N0.getOperand(0);
14526 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14527 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14528 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14529 }
14530
14531 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14532 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14533 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14534
14535 // fold operands of sext_in_reg based on knowledge that the top bits are not
14536 // demanded.
14538 return SDValue(N, 0);
14539
14540 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14541 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14542 if (SDValue NarrowLoad = reduceLoadWidth(N))
14543 return NarrowLoad;
14544
14545 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14546 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14547 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14548 if (N0.getOpcode() == ISD::SRL) {
14549 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14550 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14551 // We can turn this into an SRA iff the input to the SRL is already sign
14552 // extended enough.
14553 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14554 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14555 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14556 N0.getOperand(1));
14557 }
14558 }
14559
14560 // fold (sext_inreg (extload x)) -> (sextload x)
14561 // If sextload is not supported by target, we can only do the combine when
14562 // load has one use. Doing otherwise can block folding the extload with other
14563 // extends that the target does support.
14564 if (ISD::isEXTLoad(N0.getNode()) &&
14566 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14567 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14568 N0.hasOneUse()) ||
14569 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14570 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14571 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14572 LN0->getChain(),
14573 LN0->getBasePtr(), ExtVT,
14574 LN0->getMemOperand());
14575 CombineTo(N, ExtLoad);
14576 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14577 AddToWorklist(ExtLoad.getNode());
14578 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14579 }
14580
14581 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14583 N0.hasOneUse() &&
14584 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14585 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14586 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14587 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14588 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14589 LN0->getChain(),
14590 LN0->getBasePtr(), ExtVT,
14591 LN0->getMemOperand());
14592 CombineTo(N, ExtLoad);
14593 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14594 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14595 }
14596
14597 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14598 // ignore it if the masked load is already sign extended
14599 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14600 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14601 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14602 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14603 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14604 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14605 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14606 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14607 CombineTo(N, ExtMaskedLoad);
14608 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14609 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14610 }
14611 }
14612
14613 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14614 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14615 if (SDValue(GN0, 0).hasOneUse() &&
14616 ExtVT == GN0->getMemoryVT() &&
14618 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14619 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14620
14621 SDValue ExtLoad = DAG.getMaskedGather(
14622 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14623 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14624
14625 CombineTo(N, ExtLoad);
14626 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14627 AddToWorklist(ExtLoad.getNode());
14628 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14629 }
14630 }
14631
14632 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14633 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14634 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14635 N0.getOperand(1), false))
14636 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14637 }
14638
14639 // Fold (iM_signext_inreg
14640 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14641 // from iN)
14642 // -> (extract_subvector (signext iN_v to iM))
14643 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14645 SDValue InnerExt = N0.getOperand(0);
14646 EVT InnerExtVT = InnerExt->getValueType(0);
14647 SDValue Extendee = InnerExt->getOperand(0);
14648
14649 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14650 (!LegalOperations ||
14651 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14652 SDValue SignExtExtendee =
14653 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14654 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14655 N0.getOperand(1));
14656 }
14657 }
14658
14659 return SDValue();
14660}
14661
14663 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14664 bool LegalOperations) {
14665 unsigned InregOpcode = N->getOpcode();
14666 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14667
14668 SDValue Src = N->getOperand(0);
14669 EVT VT = N->getValueType(0);
14670 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14671 Src.getValueType().getVectorElementType(),
14673
14674 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14675 "Expected EXTEND_VECTOR_INREG dag node in input!");
14676
14677 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14678 // FIXME: one-use check may be overly restrictive
14679 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14680 return SDValue();
14681
14682 // Profitability check: we must be extending exactly one of it's operands.
14683 // FIXME: this is probably overly restrictive.
14684 Src = Src.getOperand(0);
14685 if (Src.getValueType() != SrcVT)
14686 return SDValue();
14687
14688 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14689 return SDValue();
14690
14691 return DAG.getNode(Opcode, DL, VT, Src);
14692}
14693
14694SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14695 SDValue N0 = N->getOperand(0);
14696 EVT VT = N->getValueType(0);
14697 SDLoc DL(N);
14698
14699 if (N0.isUndef()) {
14700 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14701 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14702 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14703 ? DAG.getUNDEF(VT)
14704 : DAG.getConstant(0, DL, VT);
14705 }
14706
14707 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14708 return Res;
14709
14711 return SDValue(N, 0);
14712
14714 LegalOperations))
14715 return R;
14716
14717 return SDValue();
14718}
14719
14720SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14721 SDValue N0 = N->getOperand(0);
14722 EVT VT = N->getValueType(0);
14723 EVT SrcVT = N0.getValueType();
14724 bool isLE = DAG.getDataLayout().isLittleEndian();
14725 SDLoc DL(N);
14726
14727 // trunc(undef) = undef
14728 if (N0.isUndef())
14729 return DAG.getUNDEF(VT);
14730
14731 // fold (truncate (truncate x)) -> (truncate x)
14732 if (N0.getOpcode() == ISD::TRUNCATE)
14733 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14734
14735 // fold (truncate c1) -> c1
14736 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14737 return C;
14738
14739 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14740 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14741 N0.getOpcode() == ISD::SIGN_EXTEND ||
14742 N0.getOpcode() == ISD::ANY_EXTEND) {
14743 // if the source is smaller than the dest, we still need an extend.
14744 if (N0.getOperand(0).getValueType().bitsLT(VT))
14745 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14746 // if the source is larger than the dest, than we just need the truncate.
14747 if (N0.getOperand(0).getValueType().bitsGT(VT))
14748 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14749 // if the source and dest are the same type, we can drop both the extend
14750 // and the truncate.
14751 return N0.getOperand(0);
14752 }
14753
14754 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14755 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14756 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14757 N0.hasOneUse()) {
14758 SDValue X = N0.getOperand(0);
14759 SDValue ExtVal = N0.getOperand(1);
14760 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14761 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14762 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14763 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14764 }
14765 }
14766
14767 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14768 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14769 return SDValue();
14770
14771 // Fold extract-and-trunc into a narrow extract. For example:
14772 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14773 // i32 y = TRUNCATE(i64 x)
14774 // -- becomes --
14775 // v16i8 b = BITCAST (v2i64 val)
14776 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14777 //
14778 // Note: We only run this optimization after type legalization (which often
14779 // creates this pattern) and before operation legalization after which
14780 // we need to be more careful about the vector instructions that we generate.
14781 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14782 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14783 EVT VecTy = N0.getOperand(0).getValueType();
14784 EVT ExTy = N0.getValueType();
14785 EVT TrTy = N->getValueType(0);
14786
14787 auto EltCnt = VecTy.getVectorElementCount();
14788 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14789 auto NewEltCnt = EltCnt * SizeRatio;
14790
14791 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14792 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14793
14794 SDValue EltNo = N0->getOperand(1);
14795 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14796 int Elt = EltNo->getAsZExtVal();
14797 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14798 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14799 DAG.getBitcast(NVT, N0.getOperand(0)),
14801 }
14802 }
14803
14804 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14805 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14806 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14807 TLI.isTruncateFree(SrcVT, VT)) {
14808 SDLoc SL(N0);
14809 SDValue Cond = N0.getOperand(0);
14810 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14811 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14812 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
14813 }
14814 }
14815
14816 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14817 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14818 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14819 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14820 SDValue Amt = N0.getOperand(1);
14821 KnownBits Known = DAG.computeKnownBits(Amt);
14822 unsigned Size = VT.getScalarSizeInBits();
14823 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14824 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14825 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14826 if (AmtVT != Amt.getValueType()) {
14827 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
14828 AddToWorklist(Amt.getNode());
14829 }
14830 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
14831 }
14832 }
14833
14834 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
14835 return V;
14836
14837 if (SDValue ABD = foldABSToABD(N, DL))
14838 return ABD;
14839
14840 // Attempt to pre-truncate BUILD_VECTOR sources.
14841 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14842 N0.hasOneUse() &&
14843 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14844 // Avoid creating illegal types if running after type legalizer.
14845 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14846 EVT SVT = VT.getScalarType();
14847 SmallVector<SDValue, 8> TruncOps;
14848 for (const SDValue &Op : N0->op_values()) {
14849 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14850 TruncOps.push_back(TruncOp);
14851 }
14852 return DAG.getBuildVector(VT, DL, TruncOps);
14853 }
14854
14855 // trunc (splat_vector x) -> splat_vector (trunc x)
14856 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14857 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14858 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14859 EVT SVT = VT.getScalarType();
14860 return DAG.getSplatVector(
14861 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14862 }
14863
14864 // Fold a series of buildvector, bitcast, and truncate if possible.
14865 // For example fold
14866 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14867 // (2xi32 (buildvector x, y)).
14868 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14869 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14871 N0.getOperand(0).hasOneUse()) {
14872 SDValue BuildVect = N0.getOperand(0);
14873 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14874 EVT TruncVecEltTy = VT.getVectorElementType();
14875
14876 // Check that the element types match.
14877 if (BuildVectEltTy == TruncVecEltTy) {
14878 // Now we only need to compute the offset of the truncated elements.
14879 unsigned BuildVecNumElts = BuildVect.getNumOperands();
14880 unsigned TruncVecNumElts = VT.getVectorNumElements();
14881 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14882
14883 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
14884 "Invalid number of elements");
14885
14887 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14888 Opnds.push_back(BuildVect.getOperand(i));
14889
14890 return DAG.getBuildVector(VT, DL, Opnds);
14891 }
14892 }
14893
14894 // fold (truncate (load x)) -> (smaller load x)
14895 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14896 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14897 if (SDValue Reduced = reduceLoadWidth(N))
14898 return Reduced;
14899
14900 // Handle the case where the truncated result is at least as wide as the
14901 // loaded type.
14902 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14903 auto *LN0 = cast<LoadSDNode>(N0);
14904 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
14905 SDValue NewLoad = DAG.getExtLoad(
14906 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
14907 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
14908 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14909 return NewLoad;
14910 }
14911 }
14912 }
14913
14914 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14915 // where ... are all 'undef'.
14916 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14918 SDValue V;
14919 unsigned Idx = 0;
14920 unsigned NumDefs = 0;
14921
14922 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14923 SDValue X = N0.getOperand(i);
14924 if (!X.isUndef()) {
14925 V = X;
14926 Idx = i;
14927 NumDefs++;
14928 }
14929 // Stop if more than one members are non-undef.
14930 if (NumDefs > 1)
14931 break;
14932
14935 X.getValueType().getVectorElementCount()));
14936 }
14937
14938 if (NumDefs == 0)
14939 return DAG.getUNDEF(VT);
14940
14941 if (NumDefs == 1) {
14942 assert(V.getNode() && "The single defined operand is empty!");
14944 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14945 if (i != Idx) {
14946 Opnds.push_back(DAG.getUNDEF(VTs[i]));
14947 continue;
14948 }
14949 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14950 AddToWorklist(NV.getNode());
14951 Opnds.push_back(NV);
14952 }
14953 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
14954 }
14955 }
14956
14957 // Fold truncate of a bitcast of a vector to an extract of the low vector
14958 // element.
14959 //
14960 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14961 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14962 SDValue VecSrc = N0.getOperand(0);
14963 EVT VecSrcVT = VecSrc.getValueType();
14964 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14965 (!LegalOperations ||
14966 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14967 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14968 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
14970 }
14971 }
14972
14973 // Simplify the operands using demanded-bits information.
14975 return SDValue(N, 0);
14976
14977 // fold (truncate (extract_subvector(ext x))) ->
14978 // (extract_subvector x)
14979 // TODO: This can be generalized to cover cases where the truncate and extract
14980 // do not fully cancel each other out.
14981 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
14982 SDValue N00 = N0.getOperand(0);
14983 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
14984 N00.getOpcode() == ISD::ZERO_EXTEND ||
14985 N00.getOpcode() == ISD::ANY_EXTEND) {
14986 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
14988 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
14989 N00.getOperand(0), N0.getOperand(1));
14990 }
14991 }
14992
14993 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14994 return NewVSel;
14995
14996 // Narrow a suitable binary operation with a non-opaque constant operand by
14997 // moving it ahead of the truncate. This is limited to pre-legalization
14998 // because targets may prefer a wider type during later combines and invert
14999 // this transform.
15000 switch (N0.getOpcode()) {
15001 case ISD::ADD:
15002 case ISD::SUB:
15003 case ISD::MUL:
15004 case ISD::AND:
15005 case ISD::OR:
15006 case ISD::XOR:
15007 if (!LegalOperations && N0.hasOneUse() &&
15008 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15009 isConstantOrConstantVector(N0.getOperand(1), true))) {
15010 // TODO: We already restricted this to pre-legalization, but for vectors
15011 // we are extra cautious to not create an unsupported operation.
15012 // Target-specific changes are likely needed to avoid regressions here.
15013 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15014 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15015 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15016 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15017 }
15018 }
15019 break;
15020 case ISD::ADDE:
15021 case ISD::UADDO_CARRY:
15022 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15023 // (trunc uaddo_carry(X, Y, Carry)) ->
15024 // (uaddo_carry trunc(X), trunc(Y), Carry)
15025 // When the adde's carry is not used.
15026 // We only do for uaddo_carry before legalize operation
15027 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15028 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15029 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15030 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15031 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15032 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15033 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15034 }
15035 break;
15036 case ISD::USUBSAT:
15037 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15038 // enough to know that the upper bits are zero we must ensure that we don't
15039 // introduce an extra truncate.
15040 if (!LegalOperations && N0.hasOneUse() &&
15043 VT.getScalarSizeInBits() &&
15044 hasOperation(N0.getOpcode(), VT)) {
15045 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15046 DAG, DL);
15047 }
15048 break;
15049 }
15050
15051 return SDValue();
15052}
15053
15054static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15055 SDValue Elt = N->getOperand(i);
15056 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15057 return Elt.getNode();
15058 return Elt.getOperand(Elt.getResNo()).getNode();
15059}
15060
15061/// build_pair (load, load) -> load
15062/// if load locations are consecutive.
15063SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15064 assert(N->getOpcode() == ISD::BUILD_PAIR);
15065
15066 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15067 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15068
15069 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15070 // most significant part in elt 1. So when combining into one large load, we
15071 // need to consider the endianness.
15072 if (DAG.getDataLayout().isBigEndian())
15073 std::swap(LD1, LD2);
15074
15075 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15076 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15077 LD1->getAddressSpace() != LD2->getAddressSpace())
15078 return SDValue();
15079
15080 unsigned LD1Fast = 0;
15081 EVT LD1VT = LD1->getValueType(0);
15082 unsigned LD1Bytes = LD1VT.getStoreSize();
15083 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15084 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15085 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15086 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15087 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15088 LD1->getPointerInfo(), LD1->getAlign());
15089
15090 return SDValue();
15091}
15092
15093static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15094 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15095 // and Lo parts; on big-endian machines it doesn't.
15096 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15097}
15098
15099SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15100 const TargetLowering &TLI) {
15101 // If this is not a bitcast to an FP type or if the target doesn't have
15102 // IEEE754-compliant FP logic, we're done.
15103 EVT VT = N->getValueType(0);
15104 SDValue N0 = N->getOperand(0);
15105 EVT SourceVT = N0.getValueType();
15106
15107 if (!VT.isFloatingPoint())
15108 return SDValue();
15109
15110 // TODO: Handle cases where the integer constant is a different scalar
15111 // bitwidth to the FP.
15112 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15113 return SDValue();
15114
15115 unsigned FPOpcode;
15116 APInt SignMask;
15117 switch (N0.getOpcode()) {
15118 case ISD::AND:
15119 FPOpcode = ISD::FABS;
15120 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15121 break;
15122 case ISD::XOR:
15123 FPOpcode = ISD::FNEG;
15124 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15125 break;
15126 case ISD::OR:
15127 FPOpcode = ISD::FABS;
15128 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15129 break;
15130 default:
15131 return SDValue();
15132 }
15133
15134 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15135 return SDValue();
15136
15137 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15138 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15139 // removing this would require more changes.
15140 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15141 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15142 return true;
15143
15144 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15145 };
15146
15147 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15148 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15149 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15150 // fneg (fabs X)
15151 SDValue LogicOp0 = N0.getOperand(0);
15152 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15153 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15154 IsBitCastOrFree(LogicOp0, VT)) {
15155 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15156 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15157 NumFPLogicOpsConv++;
15158 if (N0.getOpcode() == ISD::OR)
15159 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15160 return FPOp;
15161 }
15162
15163 return SDValue();
15164}
15165
15166SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15167 SDValue N0 = N->getOperand(0);
15168 EVT VT = N->getValueType(0);
15169
15170 if (N0.isUndef())
15171 return DAG.getUNDEF(VT);
15172
15173 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15174 // Only do this before legalize types, unless both types are integer and the
15175 // scalar type is legal. Only do this before legalize ops, since the target
15176 // maybe depending on the bitcast.
15177 // First check to see if this is all constant.
15178 // TODO: Support FP bitcasts after legalize types.
15179 if (VT.isVector() &&
15180 (!LegalTypes ||
15181 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15182 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15183 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15184 cast<BuildVectorSDNode>(N0)->isConstant())
15185 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15187
15188 // If the input is a constant, let getNode fold it.
15189 if (isIntOrFPConstant(N0)) {
15190 // If we can't allow illegal operations, we need to check that this is just
15191 // a fp -> int or int -> conversion and that the resulting operation will
15192 // be legal.
15193 if (!LegalOperations ||
15194 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15196 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15197 TLI.isOperationLegal(ISD::Constant, VT))) {
15198 SDValue C = DAG.getBitcast(VT, N0);
15199 if (C.getNode() != N)
15200 return C;
15201 }
15202 }
15203
15204 // (conv (conv x, t1), t2) -> (conv x, t2)
15205 if (N0.getOpcode() == ISD::BITCAST)
15206 return DAG.getBitcast(VT, N0.getOperand(0));
15207
15208 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15209 // iff the current bitwise logicop type isn't legal
15210 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15211 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15212 auto IsFreeBitcast = [VT](SDValue V) {
15213 return (V.getOpcode() == ISD::BITCAST &&
15214 V.getOperand(0).getValueType() == VT) ||
15216 V->hasOneUse());
15217 };
15218 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15219 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15220 DAG.getBitcast(VT, N0.getOperand(0)),
15221 DAG.getBitcast(VT, N0.getOperand(1)));
15222 }
15223
15224 // fold (conv (load x)) -> (load (conv*)x)
15225 // If the resultant load doesn't need a higher alignment than the original!
15226 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15227 // Do not remove the cast if the types differ in endian layout.
15229 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15230 // If the load is volatile, we only want to change the load type if the
15231 // resulting load is legal. Otherwise we might increase the number of
15232 // memory accesses. We don't care if the original type was legal or not
15233 // as we assume software couldn't rely on the number of accesses of an
15234 // illegal type.
15235 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15236 TLI.isOperationLegal(ISD::LOAD, VT))) {
15237 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15238
15239 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15240 *LN0->getMemOperand())) {
15241 SDValue Load =
15242 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15243 LN0->getMemOperand());
15244 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15245 return Load;
15246 }
15247 }
15248
15249 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15250 return V;
15251
15252 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15253 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15254 //
15255 // For ppc_fp128:
15256 // fold (bitcast (fneg x)) ->
15257 // flipbit = signbit
15258 // (xor (bitcast x) (build_pair flipbit, flipbit))
15259 //
15260 // fold (bitcast (fabs x)) ->
15261 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15262 // (xor (bitcast x) (build_pair flipbit, flipbit))
15263 // This often reduces constant pool loads.
15264 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15265 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15266 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15267 !N0.getValueType().isVector()) {
15268 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15269 AddToWorklist(NewConv.getNode());
15270
15271 SDLoc DL(N);
15272 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15273 assert(VT.getSizeInBits() == 128);
15274 SDValue SignBit = DAG.getConstant(
15275 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15276 SDValue FlipBit;
15277 if (N0.getOpcode() == ISD::FNEG) {
15278 FlipBit = SignBit;
15279 AddToWorklist(FlipBit.getNode());
15280 } else {
15281 assert(N0.getOpcode() == ISD::FABS);
15282 SDValue Hi =
15283 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15285 SDLoc(NewConv)));
15286 AddToWorklist(Hi.getNode());
15287 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15288 AddToWorklist(FlipBit.getNode());
15289 }
15290 SDValue FlipBits =
15291 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15292 AddToWorklist(FlipBits.getNode());
15293 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15294 }
15295 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15296 if (N0.getOpcode() == ISD::FNEG)
15297 return DAG.getNode(ISD::XOR, DL, VT,
15298 NewConv, DAG.getConstant(SignBit, DL, VT));
15299 assert(N0.getOpcode() == ISD::FABS);
15300 return DAG.getNode(ISD::AND, DL, VT,
15301 NewConv, DAG.getConstant(~SignBit, DL, VT));
15302 }
15303
15304 // fold (bitconvert (fcopysign cst, x)) ->
15305 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15306 // Note that we don't handle (copysign x, cst) because this can always be
15307 // folded to an fneg or fabs.
15308 //
15309 // For ppc_fp128:
15310 // fold (bitcast (fcopysign cst, x)) ->
15311 // flipbit = (and (extract_element
15312 // (xor (bitcast cst), (bitcast x)), 0),
15313 // signbit)
15314 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15315 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15316 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15317 !VT.isVector()) {
15318 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15319 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15320 if (isTypeLegal(IntXVT)) {
15321 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15322 AddToWorklist(X.getNode());
15323
15324 // If X has a different width than the result/lhs, sext it or truncate it.
15325 unsigned VTWidth = VT.getSizeInBits();
15326 if (OrigXWidth < VTWidth) {
15327 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15328 AddToWorklist(X.getNode());
15329 } else if (OrigXWidth > VTWidth) {
15330 // To get the sign bit in the right place, we have to shift it right
15331 // before truncating.
15332 SDLoc DL(X);
15333 X = DAG.getNode(ISD::SRL, DL,
15334 X.getValueType(), X,
15335 DAG.getConstant(OrigXWidth-VTWidth, DL,
15336 X.getValueType()));
15337 AddToWorklist(X.getNode());
15338 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15339 AddToWorklist(X.getNode());
15340 }
15341
15342 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15343 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15344 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15345 AddToWorklist(Cst.getNode());
15346 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15347 AddToWorklist(X.getNode());
15348 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15349 AddToWorklist(XorResult.getNode());
15350 SDValue XorResult64 = DAG.getNode(
15351 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15353 SDLoc(XorResult)));
15354 AddToWorklist(XorResult64.getNode());
15355 SDValue FlipBit =
15356 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15357 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15358 AddToWorklist(FlipBit.getNode());
15359 SDValue FlipBits =
15360 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15361 AddToWorklist(FlipBits.getNode());
15362 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15363 }
15364 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15365 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15366 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15367 AddToWorklist(X.getNode());
15368
15369 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15370 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15371 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15372 AddToWorklist(Cst.getNode());
15373
15374 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15375 }
15376 }
15377
15378 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15379 if (N0.getOpcode() == ISD::BUILD_PAIR)
15380 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15381 return CombineLD;
15382
15383 // Remove double bitcasts from shuffles - this is often a legacy of
15384 // XformToShuffleWithZero being used to combine bitmaskings (of
15385 // float vectors bitcast to integer vectors) into shuffles.
15386 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15387 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15388 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15391 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15392
15393 // If operands are a bitcast, peek through if it casts the original VT.
15394 // If operands are a constant, just bitcast back to original VT.
15395 auto PeekThroughBitcast = [&](SDValue Op) {
15396 if (Op.getOpcode() == ISD::BITCAST &&
15397 Op.getOperand(0).getValueType() == VT)
15398 return SDValue(Op.getOperand(0));
15399 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15400 return DAG.getBitcast(VT, Op);
15401 return SDValue();
15402 };
15403
15404 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15405 // the result type of this bitcast. This would eliminate at least one
15406 // bitcast. See the transform in InstCombine.
15407 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15408 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15409 if (!(SV0 && SV1))
15410 return SDValue();
15411
15412 int MaskScale =
15414 SmallVector<int, 8> NewMask;
15415 for (int M : SVN->getMask())
15416 for (int i = 0; i != MaskScale; ++i)
15417 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15418
15419 SDValue LegalShuffle =
15420 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15421 if (LegalShuffle)
15422 return LegalShuffle;
15423 }
15424
15425 return SDValue();
15426}
15427
15428SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15429 EVT VT = N->getValueType(0);
15430 return CombineConsecutiveLoads(N, VT);
15431}
15432
15433SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15434 SDValue N0 = N->getOperand(0);
15435
15436 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15437 return N0;
15438
15439 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15440 // Try to push freeze through instructions that propagate but don't produce
15441 // poison as far as possible. If an operand of freeze follows three
15442 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15443 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15444 // the freeze through to the operands that are not guaranteed non-poison.
15445 // NOTE: we will strip poison-generating flags, so ignore them here.
15446 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15447 /*ConsiderFlags*/ false) ||
15448 N0->getNumValues() != 1 || !N0->hasOneUse())
15449 return SDValue();
15450
15451 bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
15452 N0.getOpcode() == ISD::BUILD_PAIR ||
15454
15455 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15456 for (SDValue Op : N0->ops()) {
15457 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15458 /*Depth*/ 1))
15459 continue;
15460 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15461 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15462 if (!HadMaybePoisonOperands)
15463 continue;
15464 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15465 // Multiple maybe-poison ops when not allowed - bail out.
15466 return SDValue();
15467 }
15468 }
15469 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15470 // it could create undef or poison due to it's poison-generating flags.
15471 // So not finding any maybe-poison operands is fine.
15472
15473 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15474 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15475 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15476 continue;
15477 // First, freeze each offending operand.
15478 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15479 // Then, change all other uses of unfrozen operand to use frozen operand.
15480 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15481 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15482 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15483 // But, that also updated the use in the freeze we just created, thus
15484 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15485 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15486 MaybePoisonOperand);
15487 }
15488 }
15489
15490 // This node has been merged with another.
15491 if (N->getOpcode() == ISD::DELETED_NODE)
15492 return SDValue(N, 0);
15493
15494 // The whole node may have been updated, so the value we were holding
15495 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15496 N0 = N->getOperand(0);
15497
15498 // Finally, recreate the node, it's operands were updated to use
15499 // frozen operands, so we just need to use it's "original" operands.
15500 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15501 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15502 for (SDValue &Op : Ops) {
15503 if (Op.getOpcode() == ISD::UNDEF)
15504 Op = DAG.getFreeze(Op);
15505 }
15506 // NOTE: this strips poison generating flags.
15507 SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15508 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15509 "Can't create node that may be undef/poison!");
15510 return R;
15511}
15512
15513/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15514/// operands. DstEltVT indicates the destination element value type.
15515SDValue DAGCombiner::
15516ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15517 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15518
15519 // If this is already the right type, we're done.
15520 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15521
15522 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15523 unsigned DstBitSize = DstEltVT.getSizeInBits();
15524
15525 // If this is a conversion of N elements of one type to N elements of another
15526 // type, convert each element. This handles FP<->INT cases.
15527 if (SrcBitSize == DstBitSize) {
15529 for (SDValue Op : BV->op_values()) {
15530 // If the vector element type is not legal, the BUILD_VECTOR operands
15531 // are promoted and implicitly truncated. Make that explicit here.
15532 if (Op.getValueType() != SrcEltVT)
15533 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15534 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15535 AddToWorklist(Ops.back().getNode());
15536 }
15537 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15539 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15540 }
15541
15542 // Otherwise, we're growing or shrinking the elements. To avoid having to
15543 // handle annoying details of growing/shrinking FP values, we convert them to
15544 // int first.
15545 if (SrcEltVT.isFloatingPoint()) {
15546 // Convert the input float vector to a int vector where the elements are the
15547 // same sizes.
15548 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15549 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15550 SrcEltVT = IntVT;
15551 }
15552
15553 // Now we know the input is an integer vector. If the output is a FP type,
15554 // convert to integer first, then to FP of the right size.
15555 if (DstEltVT.isFloatingPoint()) {
15556 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15557 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15558
15559 // Next, convert to FP elements of the same size.
15560 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15561 }
15562
15563 // Okay, we know the src/dst types are both integers of differing types.
15564 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15565
15566 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15567 // BuildVectorSDNode?
15568 auto *BVN = cast<BuildVectorSDNode>(BV);
15569
15570 // Extract the constant raw bit data.
15571 BitVector UndefElements;
15572 SmallVector<APInt> RawBits;
15573 bool IsLE = DAG.getDataLayout().isLittleEndian();
15574 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15575 return SDValue();
15576
15577 SDLoc DL(BV);
15579 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15580 if (UndefElements[I])
15581 Ops.push_back(DAG.getUNDEF(DstEltVT));
15582 else
15583 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15584 }
15585
15586 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15587 return DAG.getBuildVector(VT, DL, Ops);
15588}
15589
15590// Returns true if floating point contraction is allowed on the FMUL-SDValue
15591// `N`
15593 assert(N.getOpcode() == ISD::FMUL);
15594
15595 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15596 N->getFlags().hasAllowContract();
15597}
15598
15599// Returns true if `N` can assume no infinities involved in its computation.
15601 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15602}
15603
15604/// Try to perform FMA combining on a given FADD node.
15605template <class MatchContextClass>
15606SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15607 SDValue N0 = N->getOperand(0);
15608 SDValue N1 = N->getOperand(1);
15609 EVT VT = N->getValueType(0);
15610 SDLoc SL(N);
15611 MatchContextClass matcher(DAG, TLI, N);
15612 const TargetOptions &Options = DAG.getTarget().Options;
15613
15614 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15615
15616 // Floating-point multiply-add with intermediate rounding.
15617 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15618 // FIXME: Add VP_FMAD opcode.
15619 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15620
15621 // Floating-point multiply-add without intermediate rounding.
15622 bool HasFMA =
15624 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15625
15626 // No valid opcode, do not combine.
15627 if (!HasFMAD && !HasFMA)
15628 return SDValue();
15629
15630 bool CanReassociate =
15631 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15632 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15633 Options.UnsafeFPMath || HasFMAD);
15634 // If the addition is not contractable, do not combine.
15635 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15636 return SDValue();
15637
15638 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15639 // beneficial. It does not reduce latency. It increases register pressure. It
15640 // replaces an fadd with an fma which is a more complex instruction, so is
15641 // likely to have a larger encoding, use more functional units, etc.
15642 if (N0 == N1)
15643 return SDValue();
15644
15645 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15646 return SDValue();
15647
15648 // Always prefer FMAD to FMA for precision.
15649 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15651
15652 auto isFusedOp = [&](SDValue N) {
15653 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15654 };
15655
15656 // Is the node an FMUL and contractable either due to global flags or
15657 // SDNodeFlags.
15658 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15659 if (!matcher.match(N, ISD::FMUL))
15660 return false;
15661 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15662 };
15663 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15664 // prefer to fold the multiply with fewer uses.
15666 if (N0->use_size() > N1->use_size())
15667 std::swap(N0, N1);
15668 }
15669
15670 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15671 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15672 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15673 N0.getOperand(1), N1);
15674 }
15675
15676 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15677 // Note: Commutes FADD operands.
15678 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15679 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15680 N1.getOperand(1), N0);
15681 }
15682
15683 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15684 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15685 // This also works with nested fma instructions:
15686 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15687 // fma A, B, (fma C, D, fma (E, F, G))
15688 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15689 // fma A, B, (fma C, D, fma (E, F, G)).
15690 // This requires reassociation because it changes the order of operations.
15691 if (CanReassociate) {
15692 SDValue FMA, E;
15693 if (isFusedOp(N0) && N0.hasOneUse()) {
15694 FMA = N0;
15695 E = N1;
15696 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15697 FMA = N1;
15698 E = N0;
15699 }
15700
15701 SDValue TmpFMA = FMA;
15702 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15703 SDValue FMul = TmpFMA->getOperand(2);
15704 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15705 SDValue C = FMul.getOperand(0);
15706 SDValue D = FMul.getOperand(1);
15707 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15709 // Replacing the inner FMul could cause the outer FMA to be simplified
15710 // away.
15711 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15712 }
15713
15714 TmpFMA = TmpFMA->getOperand(2);
15715 }
15716 }
15717
15718 // Look through FP_EXTEND nodes to do more combining.
15719
15720 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15721 if (matcher.match(N0, ISD::FP_EXTEND)) {
15722 SDValue N00 = N0.getOperand(0);
15723 if (isContractableFMUL(N00) &&
15724 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15725 N00.getValueType())) {
15726 return matcher.getNode(
15727 PreferredFusedOpcode, SL, VT,
15728 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15729 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15730 }
15731 }
15732
15733 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15734 // Note: Commutes FADD operands.
15735 if (matcher.match(N1, ISD::FP_EXTEND)) {
15736 SDValue N10 = N1.getOperand(0);
15737 if (isContractableFMUL(N10) &&
15738 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15739 N10.getValueType())) {
15740 return matcher.getNode(
15741 PreferredFusedOpcode, SL, VT,
15742 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15743 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15744 }
15745 }
15746
15747 // More folding opportunities when target permits.
15748 if (Aggressive) {
15749 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15750 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15751 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15752 SDValue Z) {
15753 return matcher.getNode(
15754 PreferredFusedOpcode, SL, VT, X, Y,
15755 matcher.getNode(PreferredFusedOpcode, SL, VT,
15756 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15757 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15758 };
15759 if (isFusedOp(N0)) {
15760 SDValue N02 = N0.getOperand(2);
15761 if (matcher.match(N02, ISD::FP_EXTEND)) {
15762 SDValue N020 = N02.getOperand(0);
15763 if (isContractableFMUL(N020) &&
15764 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15765 N020.getValueType())) {
15766 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15767 N020.getOperand(0), N020.getOperand(1),
15768 N1);
15769 }
15770 }
15771 }
15772
15773 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15774 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15775 // FIXME: This turns two single-precision and one double-precision
15776 // operation into two double-precision operations, which might not be
15777 // interesting for all targets, especially GPUs.
15778 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15779 SDValue Z) {
15780 return matcher.getNode(
15781 PreferredFusedOpcode, SL, VT,
15782 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15783 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15784 matcher.getNode(PreferredFusedOpcode, SL, VT,
15785 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15786 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15787 };
15788 if (N0.getOpcode() == ISD::FP_EXTEND) {
15789 SDValue N00 = N0.getOperand(0);
15790 if (isFusedOp(N00)) {
15791 SDValue N002 = N00.getOperand(2);
15792 if (isContractableFMUL(N002) &&
15793 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15794 N00.getValueType())) {
15795 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15796 N002.getOperand(0), N002.getOperand(1),
15797 N1);
15798 }
15799 }
15800 }
15801
15802 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15803 // -> (fma y, z, (fma (fpext u), (fpext v), x))
15804 if (isFusedOp(N1)) {
15805 SDValue N12 = N1.getOperand(2);
15806 if (N12.getOpcode() == ISD::FP_EXTEND) {
15807 SDValue N120 = N12.getOperand(0);
15808 if (isContractableFMUL(N120) &&
15809 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15810 N120.getValueType())) {
15811 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15812 N120.getOperand(0), N120.getOperand(1),
15813 N0);
15814 }
15815 }
15816 }
15817
15818 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15819 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15820 // FIXME: This turns two single-precision and one double-precision
15821 // operation into two double-precision operations, which might not be
15822 // interesting for all targets, especially GPUs.
15823 if (N1.getOpcode() == ISD::FP_EXTEND) {
15824 SDValue N10 = N1.getOperand(0);
15825 if (isFusedOp(N10)) {
15826 SDValue N102 = N10.getOperand(2);
15827 if (isContractableFMUL(N102) &&
15828 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15829 N10.getValueType())) {
15830 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15831 N102.getOperand(0), N102.getOperand(1),
15832 N0);
15833 }
15834 }
15835 }
15836 }
15837
15838 return SDValue();
15839}
15840
15841/// Try to perform FMA combining on a given FSUB node.
15842template <class MatchContextClass>
15843SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
15844 SDValue N0 = N->getOperand(0);
15845 SDValue N1 = N->getOperand(1);
15846 EVT VT = N->getValueType(0);
15847 SDLoc SL(N);
15848 MatchContextClass matcher(DAG, TLI, N);
15849 const TargetOptions &Options = DAG.getTarget().Options;
15850
15851 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15852
15853 // Floating-point multiply-add with intermediate rounding.
15854 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15855 // FIXME: Add VP_FMAD opcode.
15856 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15857
15858 // Floating-point multiply-add without intermediate rounding.
15859 bool HasFMA =
15861 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15862
15863 // No valid opcode, do not combine.
15864 if (!HasFMAD && !HasFMA)
15865 return SDValue();
15866
15867 const SDNodeFlags Flags = N->getFlags();
15868 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15869 Options.UnsafeFPMath || HasFMAD);
15870
15871 // If the subtraction is not contractable, do not combine.
15872 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15873 return SDValue();
15874
15875 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15876 return SDValue();
15877
15878 // Always prefer FMAD to FMA for precision.
15879 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15881 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15882
15883 // Is the node an FMUL and contractable either due to global flags or
15884 // SDNodeFlags.
15885 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15886 if (!matcher.match(N, ISD::FMUL))
15887 return false;
15888 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15889 };
15890
15891 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15892 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15893 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15894 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15895 XY.getOperand(1),
15896 matcher.getNode(ISD::FNEG, SL, VT, Z));
15897 }
15898 return SDValue();
15899 };
15900
15901 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15902 // Note: Commutes FSUB operands.
15903 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15904 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15905 return matcher.getNode(
15906 PreferredFusedOpcode, SL, VT,
15907 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15908 YZ.getOperand(1), X);
15909 }
15910 return SDValue();
15911 };
15912
15913 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15914 // prefer to fold the multiply with fewer uses.
15915 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15916 (N0->use_size() > N1->use_size())) {
15917 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15918 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15919 return V;
15920 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15921 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15922 return V;
15923 } else {
15924 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15925 if (SDValue V = tryToFoldXYSubZ(N0, N1))
15926 return V;
15927 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15928 if (SDValue V = tryToFoldXSubYZ(N0, N1))
15929 return V;
15930 }
15931
15932 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15933 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
15934 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15935 SDValue N00 = N0.getOperand(0).getOperand(0);
15936 SDValue N01 = N0.getOperand(0).getOperand(1);
15937 return matcher.getNode(PreferredFusedOpcode, SL, VT,
15938 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
15939 matcher.getNode(ISD::FNEG, SL, VT, N1));
15940 }
15941
15942 // Look through FP_EXTEND nodes to do more combining.
15943
15944 // fold (fsub (fpext (fmul x, y)), z)
15945 // -> (fma (fpext x), (fpext y), (fneg z))
15946 if (matcher.match(N0, ISD::FP_EXTEND)) {
15947 SDValue N00 = N0.getOperand(0);
15948 if (isContractableFMUL(N00) &&
15949 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15950 N00.getValueType())) {
15951 return matcher.getNode(
15952 PreferredFusedOpcode, SL, VT,
15953 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15954 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15955 matcher.getNode(ISD::FNEG, SL, VT, N1));
15956 }
15957 }
15958
15959 // fold (fsub x, (fpext (fmul y, z)))
15960 // -> (fma (fneg (fpext y)), (fpext z), x)
15961 // Note: Commutes FSUB operands.
15962 if (matcher.match(N1, ISD::FP_EXTEND)) {
15963 SDValue N10 = N1.getOperand(0);
15964 if (isContractableFMUL(N10) &&
15965 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15966 N10.getValueType())) {
15967 return matcher.getNode(
15968 PreferredFusedOpcode, SL, VT,
15969 matcher.getNode(
15970 ISD::FNEG, SL, VT,
15971 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
15972 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15973 }
15974 }
15975
15976 // fold (fsub (fpext (fneg (fmul, x, y))), z)
15977 // -> (fneg (fma (fpext x), (fpext y), z))
15978 // Note: This could be removed with appropriate canonicalization of the
15979 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15980 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15981 // from implementing the canonicalization in visitFSUB.
15982 if (matcher.match(N0, ISD::FP_EXTEND)) {
15983 SDValue N00 = N0.getOperand(0);
15984 if (matcher.match(N00, ISD::FNEG)) {
15985 SDValue N000 = N00.getOperand(0);
15986 if (isContractableFMUL(N000) &&
15987 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15988 N00.getValueType())) {
15989 return matcher.getNode(
15990 ISD::FNEG, SL, VT,
15991 matcher.getNode(
15992 PreferredFusedOpcode, SL, VT,
15993 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15994 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15995 N1));
15996 }
15997 }
15998 }
15999
16000 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16001 // -> (fneg (fma (fpext x)), (fpext y), z)
16002 // Note: This could be removed with appropriate canonicalization of the
16003 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16004 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16005 // from implementing the canonicalization in visitFSUB.
16006 if (matcher.match(N0, ISD::FNEG)) {
16007 SDValue N00 = N0.getOperand(0);
16008 if (matcher.match(N00, ISD::FP_EXTEND)) {
16009 SDValue N000 = N00.getOperand(0);
16010 if (isContractableFMUL(N000) &&
16011 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16012 N000.getValueType())) {
16013 return matcher.getNode(
16014 ISD::FNEG, SL, VT,
16015 matcher.getNode(
16016 PreferredFusedOpcode, SL, VT,
16017 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16018 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16019 N1));
16020 }
16021 }
16022 }
16023
16024 auto isReassociable = [&Options](SDNode *N) {
16025 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16026 };
16027
16028 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16029 &isReassociable](SDValue N) {
16030 return isContractableFMUL(N) && isReassociable(N.getNode());
16031 };
16032
16033 auto isFusedOp = [&](SDValue N) {
16034 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16035 };
16036
16037 // More folding opportunities when target permits.
16038 if (Aggressive && isReassociable(N)) {
16039 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16040 // fold (fsub (fma x, y, (fmul u, v)), z)
16041 // -> (fma x, y (fma u, v, (fneg z)))
16042 if (CanFuse && isFusedOp(N0) &&
16043 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16044 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16045 return matcher.getNode(
16046 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16047 matcher.getNode(PreferredFusedOpcode, SL, VT,
16048 N0.getOperand(2).getOperand(0),
16049 N0.getOperand(2).getOperand(1),
16050 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16051 }
16052
16053 // fold (fsub x, (fma y, z, (fmul u, v)))
16054 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16055 if (CanFuse && isFusedOp(N1) &&
16056 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16057 N1->hasOneUse() && NoSignedZero) {
16058 SDValue N20 = N1.getOperand(2).getOperand(0);
16059 SDValue N21 = N1.getOperand(2).getOperand(1);
16060 return matcher.getNode(
16061 PreferredFusedOpcode, SL, VT,
16062 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16063 N1.getOperand(1),
16064 matcher.getNode(PreferredFusedOpcode, SL, VT,
16065 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16066 }
16067
16068 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16069 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16070 if (isFusedOp(N0) && N0->hasOneUse()) {
16071 SDValue N02 = N0.getOperand(2);
16072 if (matcher.match(N02, ISD::FP_EXTEND)) {
16073 SDValue N020 = N02.getOperand(0);
16074 if (isContractableAndReassociableFMUL(N020) &&
16075 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16076 N020.getValueType())) {
16077 return matcher.getNode(
16078 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16079 matcher.getNode(
16080 PreferredFusedOpcode, SL, VT,
16081 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16082 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16083 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16084 }
16085 }
16086 }
16087
16088 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16089 // -> (fma (fpext x), (fpext y),
16090 // (fma (fpext u), (fpext v), (fneg z)))
16091 // FIXME: This turns two single-precision and one double-precision
16092 // operation into two double-precision operations, which might not be
16093 // interesting for all targets, especially GPUs.
16094 if (matcher.match(N0, ISD::FP_EXTEND)) {
16095 SDValue N00 = N0.getOperand(0);
16096 if (isFusedOp(N00)) {
16097 SDValue N002 = N00.getOperand(2);
16098 if (isContractableAndReassociableFMUL(N002) &&
16099 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16100 N00.getValueType())) {
16101 return matcher.getNode(
16102 PreferredFusedOpcode, SL, VT,
16103 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16104 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16105 matcher.getNode(
16106 PreferredFusedOpcode, SL, VT,
16107 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16108 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16109 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16110 }
16111 }
16112 }
16113
16114 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16115 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16116 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16117 N1->hasOneUse()) {
16118 SDValue N120 = N1.getOperand(2).getOperand(0);
16119 if (isContractableAndReassociableFMUL(N120) &&
16120 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16121 N120.getValueType())) {
16122 SDValue N1200 = N120.getOperand(0);
16123 SDValue N1201 = N120.getOperand(1);
16124 return matcher.getNode(
16125 PreferredFusedOpcode, SL, VT,
16126 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16127 N1.getOperand(1),
16128 matcher.getNode(
16129 PreferredFusedOpcode, SL, VT,
16130 matcher.getNode(ISD::FNEG, SL, VT,
16131 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16132 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16133 }
16134 }
16135
16136 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16137 // -> (fma (fneg (fpext y)), (fpext z),
16138 // (fma (fneg (fpext u)), (fpext v), x))
16139 // FIXME: This turns two single-precision and one double-precision
16140 // operation into two double-precision operations, which might not be
16141 // interesting for all targets, especially GPUs.
16142 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16143 SDValue CvtSrc = N1.getOperand(0);
16144 SDValue N100 = CvtSrc.getOperand(0);
16145 SDValue N101 = CvtSrc.getOperand(1);
16146 SDValue N102 = CvtSrc.getOperand(2);
16147 if (isContractableAndReassociableFMUL(N102) &&
16148 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16149 CvtSrc.getValueType())) {
16150 SDValue N1020 = N102.getOperand(0);
16151 SDValue N1021 = N102.getOperand(1);
16152 return matcher.getNode(
16153 PreferredFusedOpcode, SL, VT,
16154 matcher.getNode(ISD::FNEG, SL, VT,
16155 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16156 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16157 matcher.getNode(
16158 PreferredFusedOpcode, SL, VT,
16159 matcher.getNode(ISD::FNEG, SL, VT,
16160 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16161 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16162 }
16163 }
16164 }
16165
16166 return SDValue();
16167}
16168
16169/// Try to perform FMA combining on a given FMUL node based on the distributive
16170/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16171/// subtraction instead of addition).
16172SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16173 SDValue N0 = N->getOperand(0);
16174 SDValue N1 = N->getOperand(1);
16175 EVT VT = N->getValueType(0);
16176 SDLoc SL(N);
16177
16178 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16179
16180 const TargetOptions &Options = DAG.getTarget().Options;
16181
16182 // The transforms below are incorrect when x == 0 and y == inf, because the
16183 // intermediate multiplication produces a nan.
16184 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16185 if (!hasNoInfs(Options, FAdd))
16186 return SDValue();
16187
16188 // Floating-point multiply-add without intermediate rounding.
16189 bool HasFMA =
16192 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16193
16194 // Floating-point multiply-add with intermediate rounding. This can result
16195 // in a less precise result due to the changed rounding order.
16196 bool HasFMAD = Options.UnsafeFPMath &&
16197 (LegalOperations && TLI.isFMADLegal(DAG, N));
16198
16199 // No valid opcode, do not combine.
16200 if (!HasFMAD && !HasFMA)
16201 return SDValue();
16202
16203 // Always prefer FMAD to FMA for precision.
16204 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16206
16207 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16208 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16209 auto FuseFADD = [&](SDValue X, SDValue Y) {
16210 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16211 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16212 if (C->isExactlyValue(+1.0))
16213 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16214 Y);
16215 if (C->isExactlyValue(-1.0))
16216 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16217 DAG.getNode(ISD::FNEG, SL, VT, Y));
16218 }
16219 }
16220 return SDValue();
16221 };
16222
16223 if (SDValue FMA = FuseFADD(N0, N1))
16224 return FMA;
16225 if (SDValue FMA = FuseFADD(N1, N0))
16226 return FMA;
16227
16228 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16229 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16230 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16231 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16232 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16233 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16234 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16235 if (C0->isExactlyValue(+1.0))
16236 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16237 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16238 Y);
16239 if (C0->isExactlyValue(-1.0))
16240 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16241 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16242 DAG.getNode(ISD::FNEG, SL, VT, Y));
16243 }
16244 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16245 if (C1->isExactlyValue(+1.0))
16246 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16247 DAG.getNode(ISD::FNEG, SL, VT, Y));
16248 if (C1->isExactlyValue(-1.0))
16249 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16250 Y);
16251 }
16252 }
16253 return SDValue();
16254 };
16255
16256 if (SDValue FMA = FuseFSUB(N0, N1))
16257 return FMA;
16258 if (SDValue FMA = FuseFSUB(N1, N0))
16259 return FMA;
16260
16261 return SDValue();
16262}
16263
16264SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16265 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16266
16267 // FADD -> FMA combines:
16268 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16269 if (Fused.getOpcode() != ISD::DELETED_NODE)
16270 AddToWorklist(Fused.getNode());
16271 return Fused;
16272 }
16273 return SDValue();
16274}
16275
16276SDValue DAGCombiner::visitFADD(SDNode *N) {
16277 SDValue N0 = N->getOperand(0);
16278 SDValue N1 = N->getOperand(1);
16281 EVT VT = N->getValueType(0);
16282 SDLoc DL(N);
16283 const TargetOptions &Options = DAG.getTarget().Options;
16284 SDNodeFlags Flags = N->getFlags();
16285 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16286
16287 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16288 return R;
16289
16290 // fold (fadd c1, c2) -> c1 + c2
16291 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16292 return C;
16293
16294 // canonicalize constant to RHS
16295 if (N0CFP && !N1CFP)
16296 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16297
16298 // fold vector ops
16299 if (VT.isVector())
16300 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16301 return FoldedVOp;
16302
16303 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16304 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16305 if (N1C && N1C->isZero())
16306 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16307 return N0;
16308
16309 if (SDValue NewSel = foldBinOpIntoSelect(N))
16310 return NewSel;
16311
16312 // fold (fadd A, (fneg B)) -> (fsub A, B)
16313 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16314 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16315 N1, DAG, LegalOperations, ForCodeSize))
16316 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16317
16318 // fold (fadd (fneg A), B) -> (fsub B, A)
16319 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16320 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16321 N0, DAG, LegalOperations, ForCodeSize))
16322 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16323
16324 auto isFMulNegTwo = [](SDValue FMul) {
16325 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16326 return false;
16327 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16328 return C && C->isExactlyValue(-2.0);
16329 };
16330
16331 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16332 if (isFMulNegTwo(N0)) {
16333 SDValue B = N0.getOperand(0);
16334 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16335 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16336 }
16337 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16338 if (isFMulNegTwo(N1)) {
16339 SDValue B = N1.getOperand(0);
16340 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16341 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16342 }
16343
16344 // No FP constant should be created after legalization as Instruction
16345 // Selection pass has a hard time dealing with FP constants.
16346 bool AllowNewConst = (Level < AfterLegalizeDAG);
16347
16348 // If nnan is enabled, fold lots of things.
16349 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16350 // If allowed, fold (fadd (fneg x), x) -> 0.0
16351 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16352 return DAG.getConstantFP(0.0, DL, VT);
16353
16354 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16355 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16356 return DAG.getConstantFP(0.0, DL, VT);
16357 }
16358
16359 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16360 // TODO: break out portions of the transformations below for which Unsafe is
16361 // considered and which do not require both nsz and reassoc
16362 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16363 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16364 AllowNewConst) {
16365 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16366 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16368 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16369 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16370 }
16371
16372 // We can fold chains of FADD's of the same value into multiplications.
16373 // This transform is not safe in general because we are reducing the number
16374 // of rounding steps.
16375 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16376 if (N0.getOpcode() == ISD::FMUL) {
16377 SDNode *CFP00 =
16379 SDNode *CFP01 =
16381
16382 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16383 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16384 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16385 DAG.getConstantFP(1.0, DL, VT));
16386 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16387 }
16388
16389 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16390 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16391 N1.getOperand(0) == N1.getOperand(1) &&
16392 N0.getOperand(0) == N1.getOperand(0)) {
16393 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16394 DAG.getConstantFP(2.0, DL, VT));
16395 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16396 }
16397 }
16398
16399 if (N1.getOpcode() == ISD::FMUL) {
16400 SDNode *CFP10 =
16402 SDNode *CFP11 =
16404
16405 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16406 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16407 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16408 DAG.getConstantFP(1.0, DL, VT));
16409 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16410 }
16411
16412 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16413 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16414 N0.getOperand(0) == N0.getOperand(1) &&
16415 N1.getOperand(0) == N0.getOperand(0)) {
16416 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16417 DAG.getConstantFP(2.0, DL, VT));
16418 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16419 }
16420 }
16421
16422 if (N0.getOpcode() == ISD::FADD) {
16423 SDNode *CFP00 =
16425 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16426 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16427 (N0.getOperand(0) == N1)) {
16428 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16429 DAG.getConstantFP(3.0, DL, VT));
16430 }
16431 }
16432
16433 if (N1.getOpcode() == ISD::FADD) {
16434 SDNode *CFP10 =
16436 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16437 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16438 N1.getOperand(0) == N0) {
16439 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16440 DAG.getConstantFP(3.0, DL, VT));
16441 }
16442 }
16443
16444 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16445 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16446 N0.getOperand(0) == N0.getOperand(1) &&
16447 N1.getOperand(0) == N1.getOperand(1) &&
16448 N0.getOperand(0) == N1.getOperand(0)) {
16449 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16450 DAG.getConstantFP(4.0, DL, VT));
16451 }
16452 }
16453
16454 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16455 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16456 VT, N0, N1, Flags))
16457 return SD;
16458 } // enable-unsafe-fp-math
16459
16460 // FADD -> FMA combines:
16461 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16462 if (Fused.getOpcode() != ISD::DELETED_NODE)
16463 AddToWorklist(Fused.getNode());
16464 return Fused;
16465 }
16466 return SDValue();
16467}
16468
16469SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16470 SDValue Chain = N->getOperand(0);
16471 SDValue N0 = N->getOperand(1);
16472 SDValue N1 = N->getOperand(2);
16473 EVT VT = N->getValueType(0);
16474 EVT ChainVT = N->getValueType(1);
16475 SDLoc DL(N);
16476 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16477
16478 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16479 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16480 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16481 N1, DAG, LegalOperations, ForCodeSize)) {
16482 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16483 {Chain, N0, NegN1});
16484 }
16485
16486 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16487 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16488 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16489 N0, DAG, LegalOperations, ForCodeSize)) {
16490 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16491 {Chain, N1, NegN0});
16492 }
16493 return SDValue();
16494}
16495
16496SDValue DAGCombiner::visitFSUB(SDNode *N) {
16497 SDValue N0 = N->getOperand(0);
16498 SDValue N1 = N->getOperand(1);
16499 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16500 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16501 EVT VT = N->getValueType(0);
16502 SDLoc DL(N);
16503 const TargetOptions &Options = DAG.getTarget().Options;
16504 const SDNodeFlags Flags = N->getFlags();
16505 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16506
16507 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16508 return R;
16509
16510 // fold (fsub c1, c2) -> c1-c2
16511 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16512 return C;
16513
16514 // fold vector ops
16515 if (VT.isVector())
16516 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16517 return FoldedVOp;
16518
16519 if (SDValue NewSel = foldBinOpIntoSelect(N))
16520 return NewSel;
16521
16522 // (fsub A, 0) -> A
16523 if (N1CFP && N1CFP->isZero()) {
16524 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16525 Flags.hasNoSignedZeros()) {
16526 return N0;
16527 }
16528 }
16529
16530 if (N0 == N1) {
16531 // (fsub x, x) -> 0.0
16532 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16533 return DAG.getConstantFP(0.0f, DL, VT);
16534 }
16535
16536 // (fsub -0.0, N1) -> -N1
16537 if (N0CFP && N0CFP->isZero()) {
16538 if (N0CFP->isNegative() ||
16539 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16540 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16541 // flushed to zero, unless all users treat denorms as zero (DAZ).
16542 // FIXME: This transform will change the sign of a NaN and the behavior
16543 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16544 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16545 if (DenormMode == DenormalMode::getIEEE()) {
16546 if (SDValue NegN1 =
16547 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16548 return NegN1;
16549 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16550 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16551 }
16552 }
16553 }
16554
16555 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16556 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16557 N1.getOpcode() == ISD::FADD) {
16558 // X - (X + Y) -> -Y
16559 if (N0 == N1->getOperand(0))
16560 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16561 // X - (Y + X) -> -Y
16562 if (N0 == N1->getOperand(1))
16563 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16564 }
16565
16566 // fold (fsub A, (fneg B)) -> (fadd A, B)
16567 if (SDValue NegN1 =
16568 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16569 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16570
16571 // FSUB -> FMA combines:
16572 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16573 AddToWorklist(Fused.getNode());
16574 return Fused;
16575 }
16576
16577 return SDValue();
16578}
16579
16580// Transform IEEE Floats:
16581// (fmul C, (uitofp Pow2))
16582// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16583// (fdiv C, (uitofp Pow2))
16584// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16585//
16586// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16587// there is no need for more than an add/sub.
16588//
16589// This is valid under the following circumstances:
16590// 1) We are dealing with IEEE floats
16591// 2) C is normal
16592// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16593// TODO: Much of this could also be used for generating `ldexp` on targets the
16594// prefer it.
16595SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16596 EVT VT = N->getValueType(0);
16597 SDValue ConstOp, Pow2Op;
16598
16599 std::optional<int> Mantissa;
16600 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16601 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16602 return false;
16603
16604 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16605 Pow2Op = N->getOperand(1 - ConstOpIdx);
16606 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16607 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16608 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16609 return false;
16610
16611 Pow2Op = Pow2Op.getOperand(0);
16612
16613 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16614 // TODO: We could use knownbits to make this bound more precise.
16615 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16616
16617 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16618 if (CFP == nullptr)
16619 return false;
16620
16621 const APFloat &APF = CFP->getValueAPF();
16622
16623 // Make sure we have normal/ieee constant.
16624 if (!APF.isNormal() || !APF.isIEEE())
16625 return false;
16626
16627 // Make sure the floats exponent is within the bounds that this transform
16628 // produces bitwise equals value.
16629 int CurExp = ilogb(APF);
16630 // FMul by pow2 will only increase exponent.
16631 int MinExp =
16632 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16633 // FDiv by pow2 will only decrease exponent.
16634 int MaxExp =
16635 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16636 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16638 return false;
16639
16640 // Finally make sure we actually know the mantissa for the float type.
16641 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16642 if (!Mantissa)
16643 Mantissa = ThisMantissa;
16644
16645 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16646 };
16647
16648 // TODO: We may be able to include undefs.
16649 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16650 };
16651
16652 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16653 return SDValue();
16654
16655 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16656 return SDValue();
16657
16658 // Get log2 after all other checks have taken place. This is because
16659 // BuildLogBase2 may create a new node.
16660 SDLoc DL(N);
16661 // Get Log2 type with same bitwidth as the float type (VT).
16662 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16663 if (VT.isVector())
16664 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16666
16667 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16668 /*InexpensiveOnly*/ true, NewIntVT);
16669 if (!Log2)
16670 return SDValue();
16671
16672 // Perform actual transform.
16673 SDValue MantissaShiftCnt =
16674 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16675 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16676 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16677 // cast. We could implement that by handle here to handle the casts.
16678 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16679 SDValue ResAsInt =
16680 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16681 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16682 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16683 return ResAsFP;
16684}
16685
16686SDValue DAGCombiner::visitFMUL(SDNode *N) {
16687 SDValue N0 = N->getOperand(0);
16688 SDValue N1 = N->getOperand(1);
16689 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16690 EVT VT = N->getValueType(0);
16691 SDLoc DL(N);
16692 const TargetOptions &Options = DAG.getTarget().Options;
16693 const SDNodeFlags Flags = N->getFlags();
16694 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16695
16696 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16697 return R;
16698
16699 // fold (fmul c1, c2) -> c1*c2
16700 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16701 return C;
16702
16703 // canonicalize constant to RHS
16706 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16707
16708 // fold vector ops
16709 if (VT.isVector())
16710 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16711 return FoldedVOp;
16712
16713 if (SDValue NewSel = foldBinOpIntoSelect(N))
16714 return NewSel;
16715
16716 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16717 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16719 N0.getOpcode() == ISD::FMUL) {
16720 SDValue N00 = N0.getOperand(0);
16721 SDValue N01 = N0.getOperand(1);
16722 // Avoid an infinite loop by making sure that N00 is not a constant
16723 // (the inner multiply has not been constant folded yet).
16726 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16727 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16728 }
16729 }
16730
16731 // Match a special-case: we convert X * 2.0 into fadd.
16732 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16733 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16734 N0.getOperand(0) == N0.getOperand(1)) {
16735 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16736 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16737 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16738 }
16739
16740 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16741 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16742 VT, N0, N1, Flags))
16743 return SD;
16744 }
16745
16746 // fold (fmul X, 2.0) -> (fadd X, X)
16747 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16748 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16749
16750 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16751 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16752 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16753 return DAG.getNode(ISD::FSUB, DL, VT,
16754 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16755 }
16756 }
16757
16758 // -N0 * -N1 --> N0 * N1
16763 SDValue NegN0 =
16764 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16765 if (NegN0) {
16766 HandleSDNode NegN0Handle(NegN0);
16767 SDValue NegN1 =
16768 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16769 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16771 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16772 }
16773
16774 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16775 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16776 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16777 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16778 TLI.isOperationLegal(ISD::FABS, VT)) {
16779 SDValue Select = N0, X = N1;
16780 if (Select.getOpcode() != ISD::SELECT)
16781 std::swap(Select, X);
16782
16783 SDValue Cond = Select.getOperand(0);
16784 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16785 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16786
16787 if (TrueOpnd && FalseOpnd &&
16788 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16789 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16790 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16791 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16792 switch (CC) {
16793 default: break;
16794 case ISD::SETOLT:
16795 case ISD::SETULT:
16796 case ISD::SETOLE:
16797 case ISD::SETULE:
16798 case ISD::SETLT:
16799 case ISD::SETLE:
16800 std::swap(TrueOpnd, FalseOpnd);
16801 [[fallthrough]];
16802 case ISD::SETOGT:
16803 case ISD::SETUGT:
16804 case ISD::SETOGE:
16805 case ISD::SETUGE:
16806 case ISD::SETGT:
16807 case ISD::SETGE:
16808 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16809 TLI.isOperationLegal(ISD::FNEG, VT))
16810 return DAG.getNode(ISD::FNEG, DL, VT,
16811 DAG.getNode(ISD::FABS, DL, VT, X));
16812 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16813 return DAG.getNode(ISD::FABS, DL, VT, X);
16814
16815 break;
16816 }
16817 }
16818 }
16819
16820 // FMUL -> FMA combines:
16821 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16822 AddToWorklist(Fused.getNode());
16823 return Fused;
16824 }
16825
16826 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16827 // able to run.
16828 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16829 return R;
16830
16831 return SDValue();
16832}
16833
16834template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16835 SDValue N0 = N->getOperand(0);
16836 SDValue N1 = N->getOperand(1);
16837 SDValue N2 = N->getOperand(2);
16838 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16839 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16840 EVT VT = N->getValueType(0);
16841 SDLoc DL(N);
16842 const TargetOptions &Options = DAG.getTarget().Options;
16843 // FMA nodes have flags that propagate to the created nodes.
16844 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16845 MatchContextClass matcher(DAG, TLI, N);
16846
16847 bool CanReassociate =
16848 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16849
16850 // Constant fold FMA.
16851 if (isa<ConstantFPSDNode>(N0) &&
16852 isa<ConstantFPSDNode>(N1) &&
16853 isa<ConstantFPSDNode>(N2)) {
16854 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
16855 }
16856
16857 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
16862 SDValue NegN0 =
16863 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16864 if (NegN0) {
16865 HandleSDNode NegN0Handle(NegN0);
16866 SDValue NegN1 =
16867 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16868 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16870 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
16871 }
16872
16873 // FIXME: use fast math flags instead of Options.UnsafeFPMath
16874 if (Options.UnsafeFPMath) {
16875 if (N0CFP && N0CFP->isZero())
16876 return N2;
16877 if (N1CFP && N1CFP->isZero())
16878 return N2;
16879 }
16880
16881 // FIXME: Support splat of constant.
16882 if (N0CFP && N0CFP->isExactlyValue(1.0))
16883 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
16884 if (N1CFP && N1CFP->isExactlyValue(1.0))
16885 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
16886
16887 // Canonicalize (fma c, x, y) -> (fma x, c, y)
16890 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
16891
16892 if (CanReassociate) {
16893 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
16894 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
16897 return matcher.getNode(
16898 ISD::FMUL, DL, VT, N0,
16899 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
16900 }
16901
16902 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
16903 if (matcher.match(N0, ISD::FMUL) &&
16906 return matcher.getNode(
16907 ISD::FMA, DL, VT, N0.getOperand(0),
16908 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
16909 }
16910 }
16911
16912 // (fma x, -1, y) -> (fadd (fneg x), y)
16913 // FIXME: Support splat of constant.
16914 if (N1CFP) {
16915 if (N1CFP->isExactlyValue(1.0))
16916 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
16917
16918 if (N1CFP->isExactlyValue(-1.0) &&
16919 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
16920 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
16921 AddToWorklist(RHSNeg.getNode());
16922 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
16923 }
16924
16925 // fma (fneg x), K, y -> fma x -K, y
16926 if (matcher.match(N0, ISD::FNEG) &&
16928 (N1.hasOneUse() &&
16929 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
16930 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
16931 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
16932 }
16933 }
16934
16935 // FIXME: Support splat of constant.
16936 if (CanReassociate) {
16937 // (fma x, c, x) -> (fmul x, (c+1))
16938 if (N1CFP && N0 == N2) {
16939 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16940 matcher.getNode(ISD::FADD, DL, VT, N1,
16941 DAG.getConstantFP(1.0, DL, VT)));
16942 }
16943
16944 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
16945 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
16946 return matcher.getNode(ISD::FMUL, DL, VT, N0,
16947 matcher.getNode(ISD::FADD, DL, VT, N1,
16948 DAG.getConstantFP(-1.0, DL, VT)));
16949 }
16950 }
16951
16952 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
16953 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
16954 if (!TLI.isFNegFree(VT))
16956 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
16957 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
16958 return SDValue();
16959}
16960
16961SDValue DAGCombiner::visitFMAD(SDNode *N) {
16962 SDValue N0 = N->getOperand(0);
16963 SDValue N1 = N->getOperand(1);
16964 SDValue N2 = N->getOperand(2);
16965 EVT VT = N->getValueType(0);
16966 SDLoc DL(N);
16967
16968 // Constant fold FMAD.
16969 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
16970 isa<ConstantFPSDNode>(N2))
16971 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
16972
16973 return SDValue();
16974}
16975
16976// Combine multiple FDIVs with the same divisor into multiple FMULs by the
16977// reciprocal.
16978// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
16979// Notice that this is not always beneficial. One reason is different targets
16980// may have different costs for FDIV and FMUL, so sometimes the cost of two
16981// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
16982// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
16983SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
16984 // TODO: Limit this transform based on optsize/minsize - it always creates at
16985 // least 1 extra instruction. But the perf win may be substantial enough
16986 // that only minsize should restrict this.
16987 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
16988 const SDNodeFlags Flags = N->getFlags();
16989 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
16990 return SDValue();
16991
16992 // Skip if current node is a reciprocal/fneg-reciprocal.
16993 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
16994 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
16995 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
16996 return SDValue();
16997
16998 // Exit early if the target does not want this transform or if there can't
16999 // possibly be enough uses of the divisor to make the transform worthwhile.
17000 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17001
17002 // For splat vectors, scale the number of uses by the splat factor. If we can
17003 // convert the division into a scalar op, that will likely be much faster.
17004 unsigned NumElts = 1;
17005 EVT VT = N->getValueType(0);
17006 if (VT.isVector() && DAG.isSplatValue(N1))
17007 NumElts = VT.getVectorMinNumElements();
17008
17009 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17010 return SDValue();
17011
17012 // Find all FDIV users of the same divisor.
17013 // Use a set because duplicates may be present in the user list.
17015 for (auto *U : N1->uses()) {
17016 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17017 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17018 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17019 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17020 U->getFlags().hasAllowReassociation() &&
17021 U->getFlags().hasNoSignedZeros())
17022 continue;
17023
17024 // This division is eligible for optimization only if global unsafe math
17025 // is enabled or if this division allows reciprocal formation.
17026 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17027 Users.insert(U);
17028 }
17029 }
17030
17031 // Now that we have the actual number of divisor uses, make sure it meets
17032 // the minimum threshold specified by the target.
17033 if ((Users.size() * NumElts) < MinUses)
17034 return SDValue();
17035
17036 SDLoc DL(N);
17037 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17038 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17039
17040 // Dividend / Divisor -> Dividend * Reciprocal
17041 for (auto *U : Users) {
17042 SDValue Dividend = U->getOperand(0);
17043 if (Dividend != FPOne) {
17044 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17045 Reciprocal, Flags);
17046 CombineTo(U, NewNode);
17047 } else if (U != Reciprocal.getNode()) {
17048 // In the absence of fast-math-flags, this user node is always the
17049 // same node as Reciprocal, but with FMF they may be different nodes.
17050 CombineTo(U, Reciprocal);
17051 }
17052 }
17053 return SDValue(N, 0); // N was replaced.
17054}
17055
17056SDValue DAGCombiner::visitFDIV(SDNode *N) {
17057 SDValue N0 = N->getOperand(0);
17058 SDValue N1 = N->getOperand(1);
17059 EVT VT = N->getValueType(0);
17060 SDLoc DL(N);
17061 const TargetOptions &Options = DAG.getTarget().Options;
17062 SDNodeFlags Flags = N->getFlags();
17063 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17064
17065 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17066 return R;
17067
17068 // fold (fdiv c1, c2) -> c1/c2
17069 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17070 return C;
17071
17072 // fold vector ops
17073 if (VT.isVector())
17074 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17075 return FoldedVOp;
17076
17077 if (SDValue NewSel = foldBinOpIntoSelect(N))
17078 return NewSel;
17079
17081 return V;
17082
17083 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17084 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17085 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17086 // Compute the reciprocal 1.0 / c2.
17087 const APFloat &N1APF = N1CFP->getValueAPF();
17088 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17089 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17090 // Only do the transform if the reciprocal is a legal fp immediate that
17091 // isn't too nasty (eg NaN, denormal, ...).
17092 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17093 (!LegalOperations ||
17094 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17095 // backend)... we should handle this gracefully after Legalize.
17096 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17098 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17099 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17100 DAG.getConstantFP(Recip, DL, VT));
17101 }
17102
17103 // If this FDIV is part of a reciprocal square root, it may be folded
17104 // into a target-specific square root estimate instruction.
17105 if (N1.getOpcode() == ISD::FSQRT) {
17106 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17107 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17108 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17109 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17110 if (SDValue RV =
17111 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17112 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17113 AddToWorklist(RV.getNode());
17114 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17115 }
17116 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17117 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17118 if (SDValue RV =
17119 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17120 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17121 AddToWorklist(RV.getNode());
17122 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17123 }
17124 } else if (N1.getOpcode() == ISD::FMUL) {
17125 // Look through an FMUL. Even though this won't remove the FDIV directly,
17126 // it's still worthwhile to get rid of the FSQRT if possible.
17127 SDValue Sqrt, Y;
17128 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17129 Sqrt = N1.getOperand(0);
17130 Y = N1.getOperand(1);
17131 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17132 Sqrt = N1.getOperand(1);
17133 Y = N1.getOperand(0);
17134 }
17135 if (Sqrt.getNode()) {
17136 // If the other multiply operand is known positive, pull it into the
17137 // sqrt. That will eliminate the division if we convert to an estimate.
17138 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17139 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17140 SDValue A;
17141 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17142 A = Y.getOperand(0);
17143 else if (Y == Sqrt.getOperand(0))
17144 A = Y;
17145 if (A) {
17146 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17147 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17148 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17149 SDValue AAZ =
17150 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17151 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17152 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17153
17154 // Estimate creation failed. Clean up speculatively created nodes.
17155 recursivelyDeleteUnusedNodes(AAZ.getNode());
17156 }
17157 }
17158
17159 // We found a FSQRT, so try to make this fold:
17160 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17161 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17162 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17163 AddToWorklist(Div.getNode());
17164 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17165 }
17166 }
17167 }
17168
17169 // Fold into a reciprocal estimate and multiply instead of a real divide.
17170 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17171 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17172 return RV;
17173 }
17174
17175 // Fold X/Sqrt(X) -> Sqrt(X)
17176 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17177 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17178 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17179 return N1;
17180
17181 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17186 SDValue NegN0 =
17187 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17188 if (NegN0) {
17189 HandleSDNode NegN0Handle(NegN0);
17190 SDValue NegN1 =
17191 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17192 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17194 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17195 }
17196
17197 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17198 return R;
17199
17200 return SDValue();
17201}
17202
17203SDValue DAGCombiner::visitFREM(SDNode *N) {
17204 SDValue N0 = N->getOperand(0);
17205 SDValue N1 = N->getOperand(1);
17206 EVT VT = N->getValueType(0);
17207 SDNodeFlags Flags = N->getFlags();
17208 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17209
17210 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17211 return R;
17212
17213 // fold (frem c1, c2) -> fmod(c1,c2)
17214 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
17215 return C;
17216
17217 if (SDValue NewSel = foldBinOpIntoSelect(N))
17218 return NewSel;
17219
17220 return SDValue();
17221}
17222
17223SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17224 SDNodeFlags Flags = N->getFlags();
17225 const TargetOptions &Options = DAG.getTarget().Options;
17226
17227 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17228 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17229 if (!Flags.hasApproximateFuncs() ||
17230 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17231 return SDValue();
17232
17233 SDValue N0 = N->getOperand(0);
17234 if (TLI.isFsqrtCheap(N0, DAG))
17235 return SDValue();
17236
17237 // FSQRT nodes have flags that propagate to the created nodes.
17238 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17239 // transform the fdiv, we may produce a sub-optimal estimate sequence
17240 // because the reciprocal calculation may not have to filter out a
17241 // 0.0 input.
17242 return buildSqrtEstimate(N0, Flags);
17243}
17244
17245/// copysign(x, fp_extend(y)) -> copysign(x, y)
17246/// copysign(x, fp_round(y)) -> copysign(x, y)
17247/// Operands to the functions are the type of X and Y respectively.
17248static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17249 // Always fold no-op FP casts.
17250 if (XTy == YTy)
17251 return true;
17252
17253 // Do not optimize out type conversion of f128 type yet.
17254 // For some targets like x86_64, configuration is changed to keep one f128
17255 // value in one SSE register, but instruction selection cannot handle
17256 // FCOPYSIGN on SSE registers yet.
17257 if (YTy == MVT::f128)
17258 return false;
17259
17261}
17262
17264 SDValue N1 = N->getOperand(1);
17265 if (N1.getOpcode() != ISD::FP_EXTEND &&
17266 N1.getOpcode() != ISD::FP_ROUND)
17267 return false;
17268 EVT N1VT = N1->getValueType(0);
17269 EVT N1Op0VT = N1->getOperand(0).getValueType();
17270 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17271}
17272
17273SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17274 SDValue N0 = N->getOperand(0);
17275 SDValue N1 = N->getOperand(1);
17276 EVT VT = N->getValueType(0);
17277
17278 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17279 if (SDValue C =
17280 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17281 return C;
17282
17283 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17284 const APFloat &V = N1C->getValueAPF();
17285 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17286 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17287 if (!V.isNegative()) {
17288 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17289 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17290 } else {
17291 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17292 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17293 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17294 }
17295 }
17296
17297 // copysign(fabs(x), y) -> copysign(x, y)
17298 // copysign(fneg(x), y) -> copysign(x, y)
17299 // copysign(copysign(x,z), y) -> copysign(x, y)
17300 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17301 N0.getOpcode() == ISD::FCOPYSIGN)
17302 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17303
17304 // copysign(x, abs(y)) -> abs(x)
17305 if (N1.getOpcode() == ISD::FABS)
17306 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17307
17308 // copysign(x, copysign(y,z)) -> copysign(x, z)
17309 if (N1.getOpcode() == ISD::FCOPYSIGN)
17310 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17311
17312 // copysign(x, fp_extend(y)) -> copysign(x, y)
17313 // copysign(x, fp_round(y)) -> copysign(x, y)
17315 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17316
17317 return SDValue();
17318}
17319
17320SDValue DAGCombiner::visitFPOW(SDNode *N) {
17321 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17322 if (!ExponentC)
17323 return SDValue();
17324 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17325
17326 // Try to convert x ** (1/3) into cube root.
17327 // TODO: Handle the various flavors of long double.
17328 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17329 // Some range near 1/3 should be fine.
17330 EVT VT = N->getValueType(0);
17331 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17332 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17333 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17334 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17335 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17336 // For regular numbers, rounding may cause the results to differ.
17337 // Therefore, we require { nsz ninf nnan afn } for this transform.
17338 // TODO: We could select out the special cases if we don't have nsz/ninf.
17339 SDNodeFlags Flags = N->getFlags();
17340 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17341 !Flags.hasApproximateFuncs())
17342 return SDValue();
17343
17344 // Do not create a cbrt() libcall if the target does not have it, and do not
17345 // turn a pow that has lowering support into a cbrt() libcall.
17346 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17349 return SDValue();
17350
17351 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17352 }
17353
17354 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17355 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17356 // TODO: This could be extended (using a target hook) to handle smaller
17357 // power-of-2 fractional exponents.
17358 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17359 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17360 if (ExponentIs025 || ExponentIs075) {
17361 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17362 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17363 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17364 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17365 // For regular numbers, rounding may cause the results to differ.
17366 // Therefore, we require { nsz ninf afn } for this transform.
17367 // TODO: We could select out the special cases if we don't have nsz/ninf.
17368 SDNodeFlags Flags = N->getFlags();
17369
17370 // We only need no signed zeros for the 0.25 case.
17371 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17372 !Flags.hasApproximateFuncs())
17373 return SDValue();
17374
17375 // Don't double the number of libcalls. We are trying to inline fast code.
17377 return SDValue();
17378
17379 // Assume that libcalls are the smallest code.
17380 // TODO: This restriction should probably be lifted for vectors.
17381 if (ForCodeSize)
17382 return SDValue();
17383
17384 // pow(X, 0.25) --> sqrt(sqrt(X))
17385 SDLoc DL(N);
17386 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17387 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17388 if (ExponentIs025)
17389 return SqrtSqrt;
17390 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17391 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17392 }
17393
17394 return SDValue();
17395}
17396
17398 const TargetLowering &TLI) {
17399 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17400 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17401 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17402 // conversions would return +0.0.
17403 // FIXME: We should be able to use node-level FMF here.
17404 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17405 EVT VT = N->getValueType(0);
17406 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17408 return SDValue();
17409
17410 // fptosi/fptoui round towards zero, so converting from FP to integer and
17411 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17412 SDValue N0 = N->getOperand(0);
17413 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17414 N0.getOperand(0).getValueType() == VT)
17415 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17416
17417 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17418 N0.getOperand(0).getValueType() == VT)
17419 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17420
17421 return SDValue();
17422}
17423
17424SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17425 SDValue N0 = N->getOperand(0);
17426 EVT VT = N->getValueType(0);
17427 EVT OpVT = N0.getValueType();
17428
17429 // [us]itofp(undef) = 0, because the result value is bounded.
17430 if (N0.isUndef())
17431 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17432
17433 // fold (sint_to_fp c1) -> c1fp
17435 // ...but only if the target supports immediate floating-point values
17436 (!LegalOperations ||
17438 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17439
17440 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17441 // but UINT_TO_FP is legal on this target, try to convert.
17442 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17443 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17444 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17445 if (DAG.SignBitIsZero(N0))
17446 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17447 }
17448
17449 // The next optimizations are desirable only if SELECT_CC can be lowered.
17450 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17451 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17452 !VT.isVector() &&
17453 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17454 SDLoc DL(N);
17455 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17456 DAG.getConstantFP(0.0, DL, VT));
17457 }
17458
17459 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17460 // (select (setcc x, y, cc), 1.0, 0.0)
17461 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17462 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17463 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17464 SDLoc DL(N);
17465 return DAG.getSelect(DL, VT, N0.getOperand(0),
17466 DAG.getConstantFP(1.0, DL, VT),
17467 DAG.getConstantFP(0.0, DL, VT));
17468 }
17469
17470 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17471 return FTrunc;
17472
17473 return SDValue();
17474}
17475
17476SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17477 SDValue N0 = N->getOperand(0);
17478 EVT VT = N->getValueType(0);
17479 EVT OpVT = N0.getValueType();
17480
17481 // [us]itofp(undef) = 0, because the result value is bounded.
17482 if (N0.isUndef())
17483 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17484
17485 // fold (uint_to_fp c1) -> c1fp
17487 // ...but only if the target supports immediate floating-point values
17488 (!LegalOperations ||
17490 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17491
17492 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17493 // but SINT_TO_FP is legal on this target, try to convert.
17494 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17495 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17496 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17497 if (DAG.SignBitIsZero(N0))
17498 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17499 }
17500
17501 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17502 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17503 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17504 SDLoc DL(N);
17505 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17506 DAG.getConstantFP(0.0, DL, VT));
17507 }
17508
17509 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17510 return FTrunc;
17511
17512 return SDValue();
17513}
17514
17515// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17517 SDValue N0 = N->getOperand(0);
17518 EVT VT = N->getValueType(0);
17519
17520 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17521 return SDValue();
17522
17523 SDValue Src = N0.getOperand(0);
17524 EVT SrcVT = Src.getValueType();
17525 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17526 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17527
17528 // We can safely assume the conversion won't overflow the output range,
17529 // because (for example) (uint8_t)18293.f is undefined behavior.
17530
17531 // Since we can assume the conversion won't overflow, our decision as to
17532 // whether the input will fit in the float should depend on the minimum
17533 // of the input range and output range.
17534
17535 // This means this is also safe for a signed input and unsigned output, since
17536 // a negative input would lead to undefined behavior.
17537 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17538 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17539 unsigned ActualSize = std::min(InputSize, OutputSize);
17540 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17541
17542 // We can only fold away the float conversion if the input range can be
17543 // represented exactly in the float range.
17544 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17545 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17546 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17548 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17549 }
17550 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17551 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17552 return DAG.getBitcast(VT, Src);
17553 }
17554 return SDValue();
17555}
17556
17557SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17558 SDValue N0 = N->getOperand(0);
17559 EVT VT = N->getValueType(0);
17560
17561 // fold (fp_to_sint undef) -> undef
17562 if (N0.isUndef())
17563 return DAG.getUNDEF(VT);
17564
17565 // fold (fp_to_sint c1fp) -> c1
17567 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17568
17569 return FoldIntToFPToInt(N, DAG);
17570}
17571
17572SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17573 SDValue N0 = N->getOperand(0);
17574 EVT VT = N->getValueType(0);
17575
17576 // fold (fp_to_uint undef) -> undef
17577 if (N0.isUndef())
17578 return DAG.getUNDEF(VT);
17579
17580 // fold (fp_to_uint c1fp) -> c1
17582 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17583
17584 return FoldIntToFPToInt(N, DAG);
17585}
17586
17587SDValue DAGCombiner::visitXRINT(SDNode *N) {
17588 SDValue N0 = N->getOperand(0);
17589 EVT VT = N->getValueType(0);
17590
17591 // fold (lrint|llrint undef) -> undef
17592 if (N0.isUndef())
17593 return DAG.getUNDEF(VT);
17594
17595 // fold (lrint|llrint c1fp) -> c1
17597 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17598
17599 return SDValue();
17600}
17601
17602SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17603 SDValue N0 = N->getOperand(0);
17604 SDValue N1 = N->getOperand(1);
17605 EVT VT = N->getValueType(0);
17606
17607 // fold (fp_round c1fp) -> c1fp
17608 if (SDValue C =
17609 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17610 return C;
17611
17612 // fold (fp_round (fp_extend x)) -> x
17613 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17614 return N0.getOperand(0);
17615
17616 // fold (fp_round (fp_round x)) -> (fp_round x)
17617 if (N0.getOpcode() == ISD::FP_ROUND) {
17618 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17619 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17620
17621 // Avoid folding legal fp_rounds into non-legal ones.
17622 if (!hasOperation(ISD::FP_ROUND, VT))
17623 return SDValue();
17624
17625 // Skip this folding if it results in an fp_round from f80 to f16.
17626 //
17627 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17628 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17629 // instructions from f32 or f64. Moreover, the first (value-preserving)
17630 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17631 // x86.
17632 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17633 return SDValue();
17634
17635 // If the first fp_round isn't a value preserving truncation, it might
17636 // introduce a tie in the second fp_round, that wouldn't occur in the
17637 // single-step fp_round we want to fold to.
17638 // In other words, double rounding isn't the same as rounding.
17639 // Also, this is a value preserving truncation iff both fp_round's are.
17640 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17641 SDLoc DL(N);
17642 return DAG.getNode(
17643 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17644 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17645 }
17646 }
17647
17648 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17649 // Note: From a legality perspective, this is a two step transform. First,
17650 // we duplicate the fp_round to the arguments of the copysign, then we
17651 // eliminate the fp_round on Y. The second step requires an additional
17652 // predicate to match the implementation above.
17653 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17655 N0.getValueType())) {
17656 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17657 N0.getOperand(0), N1);
17658 AddToWorklist(Tmp.getNode());
17659 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17660 Tmp, N0.getOperand(1));
17661 }
17662
17663 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17664 return NewVSel;
17665
17666 return SDValue();
17667}
17668
17669SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17670 SDValue N0 = N->getOperand(0);
17671 EVT VT = N->getValueType(0);
17672
17673 if (VT.isVector())
17674 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17675 return FoldedVOp;
17676
17677 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17678 if (N->hasOneUse() &&
17679 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17680 return SDValue();
17681
17682 // fold (fp_extend c1fp) -> c1fp
17684 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17685
17686 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17687 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17689 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17690
17691 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17692 // value of X.
17693 if (N0.getOpcode() == ISD::FP_ROUND
17694 && N0.getConstantOperandVal(1) == 1) {
17695 SDValue In = N0.getOperand(0);
17696 if (In.getValueType() == VT) return In;
17697 if (VT.bitsLT(In.getValueType()))
17698 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17699 In, N0.getOperand(1));
17700 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17701 }
17702
17703 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17704 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17706 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17707 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17708 LN0->getChain(),
17709 LN0->getBasePtr(), N0.getValueType(),
17710 LN0->getMemOperand());
17711 CombineTo(N, ExtLoad);
17712 CombineTo(
17713 N0.getNode(),
17714 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17715 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17716 ExtLoad.getValue(1));
17717 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17718 }
17719
17720 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17721 return NewVSel;
17722
17723 return SDValue();
17724}
17725
17726SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17727 SDValue N0 = N->getOperand(0);
17728 EVT VT = N->getValueType(0);
17729
17730 // fold (fceil c1) -> fceil(c1)
17732 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17733
17734 return SDValue();
17735}
17736
17737SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17738 SDValue N0 = N->getOperand(0);
17739 EVT VT = N->getValueType(0);
17740
17741 // fold (ftrunc c1) -> ftrunc(c1)
17743 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17744
17745 // fold ftrunc (known rounded int x) -> x
17746 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17747 // likely to be generated to extract integer from a rounded floating value.
17748 switch (N0.getOpcode()) {
17749 default: break;
17750 case ISD::FRINT:
17751 case ISD::FTRUNC:
17752 case ISD::FNEARBYINT:
17753 case ISD::FROUNDEVEN:
17754 case ISD::FFLOOR:
17755 case ISD::FCEIL:
17756 return N0;
17757 }
17758
17759 return SDValue();
17760}
17761
17762SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17763 SDValue N0 = N->getOperand(0);
17764
17765 // fold (ffrexp c1) -> ffrexp(c1)
17767 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17768 return SDValue();
17769}
17770
17771SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17772 SDValue N0 = N->getOperand(0);
17773 EVT VT = N->getValueType(0);
17774
17775 // fold (ffloor c1) -> ffloor(c1)
17777 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17778
17779 return SDValue();
17780}
17781
17782SDValue DAGCombiner::visitFNEG(SDNode *N) {
17783 SDValue N0 = N->getOperand(0);
17784 EVT VT = N->getValueType(0);
17785 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17786
17787 // Constant fold FNEG.
17789 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17790
17791 if (SDValue NegN0 =
17792 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17793 return NegN0;
17794
17795 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17796 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17797 // know it was called from a context with a nsz flag if the input fsub does
17798 // not.
17799 if (N0.getOpcode() == ISD::FSUB &&
17801 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17802 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17803 N0.getOperand(0));
17804 }
17805
17806 if (SDValue Cast = foldSignChangeInBitcast(N))
17807 return Cast;
17808
17809 return SDValue();
17810}
17811
17812SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17813 SDValue N0 = N->getOperand(0);
17814 SDValue N1 = N->getOperand(1);
17815 EVT VT = N->getValueType(0);
17816 const SDNodeFlags Flags = N->getFlags();
17817 unsigned Opc = N->getOpcode();
17818 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17819 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17820 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17821
17822 // Constant fold.
17823 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
17824 return C;
17825
17826 // Canonicalize to constant on RHS.
17829 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
17830
17831 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
17832 const APFloat &AF = N1CFP->getValueAPF();
17833
17834 // minnum(X, nan) -> X
17835 // maxnum(X, nan) -> X
17836 // minimum(X, nan) -> nan
17837 // maximum(X, nan) -> nan
17838 if (AF.isNaN())
17839 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
17840
17841 // In the following folds, inf can be replaced with the largest finite
17842 // float, if the ninf flag is set.
17843 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
17844 // minnum(X, -inf) -> -inf
17845 // maxnum(X, +inf) -> +inf
17846 // minimum(X, -inf) -> -inf if nnan
17847 // maximum(X, +inf) -> +inf if nnan
17848 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
17849 return N->getOperand(1);
17850
17851 // minnum(X, +inf) -> X if nnan
17852 // maxnum(X, -inf) -> X if nnan
17853 // minimum(X, +inf) -> X
17854 // maximum(X, -inf) -> X
17855 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
17856 return N->getOperand(0);
17857 }
17858 }
17859
17860 if (SDValue SD = reassociateReduction(
17861 PropagatesNaN
17864 Opc, SDLoc(N), VT, N0, N1, Flags))
17865 return SD;
17866
17867 return SDValue();
17868}
17869
17870SDValue DAGCombiner::visitFABS(SDNode *N) {
17871 SDValue N0 = N->getOperand(0);
17872 EVT VT = N->getValueType(0);
17873
17874 // fold (fabs c1) -> fabs(c1)
17876 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17877
17878 // fold (fabs (fabs x)) -> (fabs x)
17879 if (N0.getOpcode() == ISD::FABS)
17880 return N->getOperand(0);
17881
17882 // fold (fabs (fneg x)) -> (fabs x)
17883 // fold (fabs (fcopysign x, y)) -> (fabs x)
17884 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
17885 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
17886
17887 if (SDValue Cast = foldSignChangeInBitcast(N))
17888 return Cast;
17889
17890 return SDValue();
17891}
17892
17893SDValue DAGCombiner::visitBRCOND(SDNode *N) {
17894 SDValue Chain = N->getOperand(0);
17895 SDValue N1 = N->getOperand(1);
17896 SDValue N2 = N->getOperand(2);
17897
17898 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
17899 // nondeterministic jumps).
17900 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
17901 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17902 N1->getOperand(0), N2);
17903 }
17904
17905 // Variant of the previous fold where there is a SETCC in between:
17906 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
17907 // =>
17908 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
17909 // =>
17910 // BRCOND(SETCC(X, CONST, Cond))
17911 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
17912 // isn't equivalent to true or false.
17913 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
17914 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
17915 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
17916 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
17917 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
17918 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
17919 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
17920 bool Updated = false;
17921
17922 // Is 'X Cond C' always true or false?
17923 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
17924 bool False = (Cond == ISD::SETULT && C->isZero()) ||
17925 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
17926 (Cond == ISD::SETUGT && C->isAllOnes()) ||
17927 (Cond == ISD::SETGT && C->isMaxSignedValue());
17928 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
17929 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
17930 (Cond == ISD::SETUGE && C->isZero()) ||
17931 (Cond == ISD::SETGE && C->isMinSignedValue());
17932 return True || False;
17933 };
17934
17935 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
17936 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
17937 S0 = S0->getOperand(0);
17938 Updated = true;
17939 }
17940 }
17941 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
17942 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
17943 S1 = S1->getOperand(0);
17944 Updated = true;
17945 }
17946 }
17947
17948 if (Updated)
17949 return DAG.getNode(
17950 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17951 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
17952 }
17953
17954 // If N is a constant we could fold this into a fallthrough or unconditional
17955 // branch. However that doesn't happen very often in normal code, because
17956 // Instcombine/SimplifyCFG should have handled the available opportunities.
17957 // If we did this folding here, it would be necessary to update the
17958 // MachineBasicBlock CFG, which is awkward.
17959
17960 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
17961 // on the target.
17962 if (N1.getOpcode() == ISD::SETCC &&
17964 N1.getOperand(0).getValueType())) {
17965 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
17966 Chain, N1.getOperand(2),
17967 N1.getOperand(0), N1.getOperand(1), N2);
17968 }
17969
17970 if (N1.hasOneUse()) {
17971 // rebuildSetCC calls visitXor which may change the Chain when there is a
17972 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
17973 HandleSDNode ChainHandle(Chain);
17974 if (SDValue NewN1 = rebuildSetCC(N1))
17975 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
17976 ChainHandle.getValue(), NewN1, N2);
17977 }
17978
17979 return SDValue();
17980}
17981
17982SDValue DAGCombiner::rebuildSetCC(SDValue N) {
17983 if (N.getOpcode() == ISD::SRL ||
17984 (N.getOpcode() == ISD::TRUNCATE &&
17985 (N.getOperand(0).hasOneUse() &&
17986 N.getOperand(0).getOpcode() == ISD::SRL))) {
17987 // Look pass the truncate.
17988 if (N.getOpcode() == ISD::TRUNCATE)
17989 N = N.getOperand(0);
17990
17991 // Match this pattern so that we can generate simpler code:
17992 //
17993 // %a = ...
17994 // %b = and i32 %a, 2
17995 // %c = srl i32 %b, 1
17996 // brcond i32 %c ...
17997 //
17998 // into
17999 //
18000 // %a = ...
18001 // %b = and i32 %a, 2
18002 // %c = setcc eq %b, 0
18003 // brcond %c ...
18004 //
18005 // This applies only when the AND constant value has one bit set and the
18006 // SRL constant is equal to the log2 of the AND constant. The back-end is
18007 // smart enough to convert the result into a TEST/JMP sequence.
18008 SDValue Op0 = N.getOperand(0);
18009 SDValue Op1 = N.getOperand(1);
18010
18011 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18012 SDValue AndOp1 = Op0.getOperand(1);
18013
18014 if (AndOp1.getOpcode() == ISD::Constant) {
18015 const APInt &AndConst = AndOp1->getAsAPIntVal();
18016
18017 if (AndConst.isPowerOf2() &&
18018 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18019 SDLoc DL(N);
18020 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18021 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18022 ISD::SETNE);
18023 }
18024 }
18025 }
18026 }
18027
18028 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18029 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18030 if (N.getOpcode() == ISD::XOR) {
18031 // Because we may call this on a speculatively constructed
18032 // SimplifiedSetCC Node, we need to simplify this node first.
18033 // Ideally this should be folded into SimplifySetCC and not
18034 // here. For now, grab a handle to N so we don't lose it from
18035 // replacements interal to the visit.
18036 HandleSDNode XORHandle(N);
18037 while (N.getOpcode() == ISD::XOR) {
18038 SDValue Tmp = visitXOR(N.getNode());
18039 // No simplification done.
18040 if (!Tmp.getNode())
18041 break;
18042 // Returning N is form in-visit replacement that may invalidated
18043 // N. Grab value from Handle.
18044 if (Tmp.getNode() == N.getNode())
18045 N = XORHandle.getValue();
18046 else // Node simplified. Try simplifying again.
18047 N = Tmp;
18048 }
18049
18050 if (N.getOpcode() != ISD::XOR)
18051 return N;
18052
18053 SDValue Op0 = N->getOperand(0);
18054 SDValue Op1 = N->getOperand(1);
18055
18056 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18057 bool Equal = false;
18058 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18059 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18060 Op0.getValueType() == MVT::i1) {
18061 N = Op0;
18062 Op0 = N->getOperand(0);
18063 Op1 = N->getOperand(1);
18064 Equal = true;
18065 }
18066
18067 EVT SetCCVT = N.getValueType();
18068 if (LegalTypes)
18069 SetCCVT = getSetCCResultType(SetCCVT);
18070 // Replace the uses of XOR with SETCC
18071 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18072 Equal ? ISD::SETEQ : ISD::SETNE);
18073 }
18074 }
18075
18076 return SDValue();
18077}
18078
18079// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18080//
18081SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18082 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18083 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18084
18085 // If N is a constant we could fold this into a fallthrough or unconditional
18086 // branch. However that doesn't happen very often in normal code, because
18087 // Instcombine/SimplifyCFG should have handled the available opportunities.
18088 // If we did this folding here, it would be necessary to update the
18089 // MachineBasicBlock CFG, which is awkward.
18090
18091 // Use SimplifySetCC to simplify SETCC's.
18093 CondLHS, CondRHS, CC->get(), SDLoc(N),
18094 false);
18095 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18096
18097 // fold to a simpler setcc
18098 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18099 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18100 N->getOperand(0), Simp.getOperand(2),
18101 Simp.getOperand(0), Simp.getOperand(1),
18102 N->getOperand(4));
18103
18104 return SDValue();
18105}
18106
18107static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18108 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18109 const TargetLowering &TLI) {
18110 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18111 if (LD->isIndexed())
18112 return false;
18113 EVT VT = LD->getMemoryVT();
18114 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18115 return false;
18116 Ptr = LD->getBasePtr();
18117 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18118 if (ST->isIndexed())
18119 return false;
18120 EVT VT = ST->getMemoryVT();
18121 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18122 return false;
18123 Ptr = ST->getBasePtr();
18124 IsLoad = false;
18125 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18126 if (LD->isIndexed())
18127 return false;
18128 EVT VT = LD->getMemoryVT();
18129 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18130 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18131 return false;
18132 Ptr = LD->getBasePtr();
18133 IsMasked = true;
18134 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18135 if (ST->isIndexed())
18136 return false;
18137 EVT VT = ST->getMemoryVT();
18138 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18139 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18140 return false;
18141 Ptr = ST->getBasePtr();
18142 IsLoad = false;
18143 IsMasked = true;
18144 } else {
18145 return false;
18146 }
18147 return true;
18148}
18149
18150/// Try turning a load/store into a pre-indexed load/store when the base
18151/// pointer is an add or subtract and it has other uses besides the load/store.
18152/// After the transformation, the new indexed load/store has effectively folded
18153/// the add/subtract in and all of its other uses are redirected to the
18154/// new load/store.
18155bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18156 if (Level < AfterLegalizeDAG)
18157 return false;
18158
18159 bool IsLoad = true;
18160 bool IsMasked = false;
18161 SDValue Ptr;
18162 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18163 Ptr, TLI))
18164 return false;
18165
18166 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18167 // out. There is no reason to make this a preinc/predec.
18168 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18169 Ptr->hasOneUse())
18170 return false;
18171
18172 // Ask the target to do addressing mode selection.
18176 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18177 return false;
18178
18179 // Backends without true r+i pre-indexed forms may need to pass a
18180 // constant base with a variable offset so that constant coercion
18181 // will work with the patterns in canonical form.
18182 bool Swapped = false;
18183 if (isa<ConstantSDNode>(BasePtr)) {
18184 std::swap(BasePtr, Offset);
18185 Swapped = true;
18186 }
18187
18188 // Don't create a indexed load / store with zero offset.
18190 return false;
18191
18192 // Try turning it into a pre-indexed load / store except when:
18193 // 1) The new base ptr is a frame index.
18194 // 2) If N is a store and the new base ptr is either the same as or is a
18195 // predecessor of the value being stored.
18196 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18197 // that would create a cycle.
18198 // 4) All uses are load / store ops that use it as old base ptr.
18199
18200 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18201 // (plus the implicit offset) to a register to preinc anyway.
18202 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18203 return false;
18204
18205 // Check #2.
18206 if (!IsLoad) {
18207 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18208 : cast<StoreSDNode>(N)->getValue();
18209
18210 // Would require a copy.
18211 if (Val == BasePtr)
18212 return false;
18213
18214 // Would create a cycle.
18215 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18216 return false;
18217 }
18218
18219 // Caches for hasPredecessorHelper.
18222 Worklist.push_back(N);
18223
18224 // If the offset is a constant, there may be other adds of constants that
18225 // can be folded with this one. We should do this to avoid having to keep
18226 // a copy of the original base pointer.
18227 SmallVector<SDNode *, 16> OtherUses;
18228 constexpr unsigned int MaxSteps = 8192;
18229 if (isa<ConstantSDNode>(Offset))
18230 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18231 UE = BasePtr->use_end();
18232 UI != UE; ++UI) {
18233 SDUse &Use = UI.getUse();
18234 // Skip the use that is Ptr and uses of other results from BasePtr's
18235 // node (important for nodes that return multiple results).
18236 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18237 continue;
18238
18239 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18240 MaxSteps))
18241 continue;
18242
18243 if (Use.getUser()->getOpcode() != ISD::ADD &&
18244 Use.getUser()->getOpcode() != ISD::SUB) {
18245 OtherUses.clear();
18246 break;
18247 }
18248
18249 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18250 if (!isa<ConstantSDNode>(Op1)) {
18251 OtherUses.clear();
18252 break;
18253 }
18254
18255 // FIXME: In some cases, we can be smarter about this.
18256 if (Op1.getValueType() != Offset.getValueType()) {
18257 OtherUses.clear();
18258 break;
18259 }
18260
18261 OtherUses.push_back(Use.getUser());
18262 }
18263
18264 if (Swapped)
18265 std::swap(BasePtr, Offset);
18266
18267 // Now check for #3 and #4.
18268 bool RealUse = false;
18269
18270 for (SDNode *Use : Ptr->uses()) {
18271 if (Use == N)
18272 continue;
18273 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18274 return false;
18275
18276 // If Ptr may be folded in addressing mode of other use, then it's
18277 // not profitable to do this transformation.
18278 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18279 RealUse = true;
18280 }
18281
18282 if (!RealUse)
18283 return false;
18284
18286 if (!IsMasked) {
18287 if (IsLoad)
18288 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18289 else
18290 Result =
18291 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18292 } else {
18293 if (IsLoad)
18294 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18295 Offset, AM);
18296 else
18297 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18298 Offset, AM);
18299 }
18300 ++PreIndexedNodes;
18301 ++NodesCombined;
18302 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18303 Result.dump(&DAG); dbgs() << '\n');
18304 WorklistRemover DeadNodes(*this);
18305 if (IsLoad) {
18306 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18307 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18308 } else {
18309 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18310 }
18311
18312 // Finally, since the node is now dead, remove it from the graph.
18313 deleteAndRecombine(N);
18314
18315 if (Swapped)
18316 std::swap(BasePtr, Offset);
18317
18318 // Replace other uses of BasePtr that can be updated to use Ptr
18319 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18320 unsigned OffsetIdx = 1;
18321 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18322 OffsetIdx = 0;
18323 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18324 BasePtr.getNode() && "Expected BasePtr operand");
18325
18326 // We need to replace ptr0 in the following expression:
18327 // x0 * offset0 + y0 * ptr0 = t0
18328 // knowing that
18329 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18330 //
18331 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18332 // indexed load/store and the expression that needs to be re-written.
18333 //
18334 // Therefore, we have:
18335 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18336
18337 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18338 const APInt &Offset0 = CN->getAPIntValue();
18339 const APInt &Offset1 = Offset->getAsAPIntVal();
18340 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18341 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18342 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18343 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18344
18345 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18346
18347 APInt CNV = Offset0;
18348 if (X0 < 0) CNV = -CNV;
18349 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18350 else CNV = CNV - Offset1;
18351
18352 SDLoc DL(OtherUses[i]);
18353
18354 // We can now generate the new expression.
18355 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18356 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18357
18358 SDValue NewUse = DAG.getNode(Opcode,
18359 DL,
18360 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18361 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18362 deleteAndRecombine(OtherUses[i]);
18363 }
18364
18365 // Replace the uses of Ptr with uses of the updated base value.
18366 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18367 deleteAndRecombine(Ptr.getNode());
18368 AddToWorklist(Result.getNode());
18369
18370 return true;
18371}
18372
18374 SDValue &BasePtr, SDValue &Offset,
18376 SelectionDAG &DAG,
18377 const TargetLowering &TLI) {
18378 if (PtrUse == N ||
18379 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18380 return false;
18381
18382 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18383 return false;
18384
18385 // Don't create a indexed load / store with zero offset.
18387 return false;
18388
18389 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18390 return false;
18391
18393 for (SDNode *Use : BasePtr->uses()) {
18394 if (Use == Ptr.getNode())
18395 continue;
18396
18397 // No if there's a later user which could perform the index instead.
18398 if (isa<MemSDNode>(Use)) {
18399 bool IsLoad = true;
18400 bool IsMasked = false;
18401 SDValue OtherPtr;
18403 IsMasked, OtherPtr, TLI)) {
18405 Worklist.push_back(Use);
18406 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18407 return false;
18408 }
18409 }
18410
18411 // If all the uses are load / store addresses, then don't do the
18412 // transformation.
18413 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18414 for (SDNode *UseUse : Use->uses())
18415 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18416 return false;
18417 }
18418 }
18419 return true;
18420}
18421
18423 bool &IsMasked, SDValue &Ptr,
18424 SDValue &BasePtr, SDValue &Offset,
18426 SelectionDAG &DAG,
18427 const TargetLowering &TLI) {
18429 IsMasked, Ptr, TLI) ||
18430 Ptr->hasOneUse())
18431 return nullptr;
18432
18433 // Try turning it into a post-indexed load / store except when
18434 // 1) All uses are load / store ops that use it as base ptr (and
18435 // it may be folded as addressing mmode).
18436 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18437 // nor a successor of N. Otherwise, if Op is folded that would
18438 // create a cycle.
18439 for (SDNode *Op : Ptr->uses()) {
18440 // Check for #1.
18441 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18442 continue;
18443
18444 // Check for #2.
18447 constexpr unsigned int MaxSteps = 8192;
18448 // Ptr is predecessor to both N and Op.
18449 Visited.insert(Ptr.getNode());
18450 Worklist.push_back(N);
18451 Worklist.push_back(Op);
18452 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18453 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18454 return Op;
18455 }
18456 return nullptr;
18457}
18458
18459/// Try to combine a load/store with a add/sub of the base pointer node into a
18460/// post-indexed load/store. The transformation folded the add/subtract into the
18461/// new indexed load/store effectively and all of its uses are redirected to the
18462/// new load/store.
18463bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18464 if (Level < AfterLegalizeDAG)
18465 return false;
18466
18467 bool IsLoad = true;
18468 bool IsMasked = false;
18469 SDValue Ptr;
18473 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18474 Offset, AM, DAG, TLI);
18475 if (!Op)
18476 return false;
18477
18479 if (!IsMasked)
18480 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18481 Offset, AM)
18482 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18483 BasePtr, Offset, AM);
18484 else
18485 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18486 BasePtr, Offset, AM)
18488 BasePtr, Offset, AM);
18489 ++PostIndexedNodes;
18490 ++NodesCombined;
18491 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18492 Result.dump(&DAG); dbgs() << '\n');
18493 WorklistRemover DeadNodes(*this);
18494 if (IsLoad) {
18495 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18496 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18497 } else {
18498 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18499 }
18500
18501 // Finally, since the node is now dead, remove it from the graph.
18502 deleteAndRecombine(N);
18503
18504 // Replace the uses of Use with uses of the updated base value.
18506 Result.getValue(IsLoad ? 1 : 0));
18507 deleteAndRecombine(Op);
18508 return true;
18509}
18510
18511/// Return the base-pointer arithmetic from an indexed \p LD.
18512SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18513 ISD::MemIndexedMode AM = LD->getAddressingMode();
18514 assert(AM != ISD::UNINDEXED);
18515 SDValue BP = LD->getOperand(1);
18516 SDValue Inc = LD->getOperand(2);
18517
18518 // Some backends use TargetConstants for load offsets, but don't expect
18519 // TargetConstants in general ADD nodes. We can convert these constants into
18520 // regular Constants (if the constant is not opaque).
18522 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18523 "Cannot split out indexing using opaque target constants");
18524 if (Inc.getOpcode() == ISD::TargetConstant) {
18525 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18526 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18527 ConstInc->getValueType(0));
18528 }
18529
18530 unsigned Opc =
18531 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18532 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18533}
18534
18536 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18537}
18538
18539bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18540 EVT STType = Val.getValueType();
18541 EVT STMemType = ST->getMemoryVT();
18542 if (STType == STMemType)
18543 return true;
18544 if (isTypeLegal(STMemType))
18545 return false; // fail.
18546 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18547 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18548 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18549 return true;
18550 }
18551 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18552 STType.isInteger() && STMemType.isInteger()) {
18553 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18554 return true;
18555 }
18556 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18557 Val = DAG.getBitcast(STMemType, Val);
18558 return true;
18559 }
18560 return false; // fail.
18561}
18562
18563bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18564 EVT LDMemType = LD->getMemoryVT();
18565 EVT LDType = LD->getValueType(0);
18566 assert(Val.getValueType() == LDMemType &&
18567 "Attempting to extend value of non-matching type");
18568 if (LDType == LDMemType)
18569 return true;
18570 if (LDMemType.isInteger() && LDType.isInteger()) {
18571 switch (LD->getExtensionType()) {
18572 case ISD::NON_EXTLOAD:
18573 Val = DAG.getBitcast(LDType, Val);
18574 return true;
18575 case ISD::EXTLOAD:
18576 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18577 return true;
18578 case ISD::SEXTLOAD:
18579 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18580 return true;
18581 case ISD::ZEXTLOAD:
18582 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18583 return true;
18584 }
18585 }
18586 return false;
18587}
18588
18589StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18590 int64_t &Offset) {
18591 SDValue Chain = LD->getOperand(0);
18592
18593 // Look through CALLSEQ_START.
18594 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18595 Chain = Chain->getOperand(0);
18596
18597 StoreSDNode *ST = nullptr;
18599 if (Chain.getOpcode() == ISD::TokenFactor) {
18600 // Look for unique store within the TokenFactor.
18601 for (SDValue Op : Chain->ops()) {
18602 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18603 if (!Store)
18604 continue;
18605 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18606 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18607 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18608 continue;
18609 // Make sure the store is not aliased with any nodes in TokenFactor.
18610 GatherAllAliases(Store, Chain, Aliases);
18611 if (Aliases.empty() ||
18612 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18613 ST = Store;
18614 break;
18615 }
18616 } else {
18617 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18618 if (Store) {
18619 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18620 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18621 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18622 ST = Store;
18623 }
18624 }
18625
18626 return ST;
18627}
18628
18629SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18630 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18631 return SDValue();
18632 SDValue Chain = LD->getOperand(0);
18633 int64_t Offset;
18634
18635 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18636 // TODO: Relax this restriction for unordered atomics (see D66309)
18637 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18638 return SDValue();
18639
18640 EVT LDType = LD->getValueType(0);
18641 EVT LDMemType = LD->getMemoryVT();
18642 EVT STMemType = ST->getMemoryVT();
18643 EVT STType = ST->getValue().getValueType();
18644
18645 // There are two cases to consider here:
18646 // 1. The store is fixed width and the load is scalable. In this case we
18647 // don't know at compile time if the store completely envelops the load
18648 // so we abandon the optimisation.
18649 // 2. The store is scalable and the load is fixed width. We could
18650 // potentially support a limited number of cases here, but there has been
18651 // no cost-benefit analysis to prove it's worth it.
18652 bool LdStScalable = LDMemType.isScalableVT();
18653 if (LdStScalable != STMemType.isScalableVT())
18654 return SDValue();
18655
18656 // If we are dealing with scalable vectors on a big endian platform the
18657 // calculation of offsets below becomes trickier, since we do not know at
18658 // compile time the absolute size of the vector. Until we've done more
18659 // analysis on big-endian platforms it seems better to bail out for now.
18660 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18661 return SDValue();
18662
18663 // Normalize for Endianness. After this Offset=0 will denote that the least
18664 // significant bit in the loaded value maps to the least significant bit in
18665 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18666 // n:th least significant byte of the stored value.
18667 int64_t OrigOffset = Offset;
18668 if (DAG.getDataLayout().isBigEndian())
18669 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18670 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18671 8 -
18672 Offset;
18673
18674 // Check that the stored value cover all bits that are loaded.
18675 bool STCoversLD;
18676
18677 TypeSize LdMemSize = LDMemType.getSizeInBits();
18678 TypeSize StMemSize = STMemType.getSizeInBits();
18679 if (LdStScalable)
18680 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18681 else
18682 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18683 StMemSize.getFixedValue());
18684
18685 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18686 if (LD->isIndexed()) {
18687 // Cannot handle opaque target constants and we must respect the user's
18688 // request not to split indexes from loads.
18689 if (!canSplitIdx(LD))
18690 return SDValue();
18691 SDValue Idx = SplitIndexingFromLoad(LD);
18692 SDValue Ops[] = {Val, Idx, Chain};
18693 return CombineTo(LD, Ops, 3);
18694 }
18695 return CombineTo(LD, Val, Chain);
18696 };
18697
18698 if (!STCoversLD)
18699 return SDValue();
18700
18701 // Memory as copy space (potentially masked).
18702 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18703 // Simple case: Direct non-truncating forwarding
18704 if (LDType.getSizeInBits() == LdMemSize)
18705 return ReplaceLd(LD, ST->getValue(), Chain);
18706 // Can we model the truncate and extension with an and mask?
18707 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18708 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18709 // Mask to size of LDMemType
18710 auto Mask =
18712 StMemSize.getFixedValue()),
18713 SDLoc(ST), STType);
18714 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18715 return ReplaceLd(LD, Val, Chain);
18716 }
18717 }
18718
18719 // Handle some cases for big-endian that would be Offset 0 and handled for
18720 // little-endian.
18721 SDValue Val = ST->getValue();
18722 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18723 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18724 !LDType.isVector() && isTypeLegal(STType) &&
18725 TLI.isOperationLegal(ISD::SRL, STType)) {
18726 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18727 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18728 Offset = 0;
18729 }
18730 }
18731
18732 // TODO: Deal with nonzero offset.
18733 if (LD->getBasePtr().isUndef() || Offset != 0)
18734 return SDValue();
18735 // Model necessary truncations / extenstions.
18736 // Truncate Value To Stored Memory Size.
18737 do {
18738 if (!getTruncatedStoreValue(ST, Val))
18739 continue;
18740 if (!isTypeLegal(LDMemType))
18741 continue;
18742 if (STMemType != LDMemType) {
18743 // TODO: Support vectors? This requires extract_subvector/bitcast.
18744 if (!STMemType.isVector() && !LDMemType.isVector() &&
18745 STMemType.isInteger() && LDMemType.isInteger())
18746 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18747 else
18748 continue;
18749 }
18750 if (!extendLoadedValueToExtension(LD, Val))
18751 continue;
18752 return ReplaceLd(LD, Val, Chain);
18753 } while (false);
18754
18755 // On failure, cleanup dead nodes we may have created.
18756 if (Val->use_empty())
18757 deleteAndRecombine(Val.getNode());
18758 return SDValue();
18759}
18760
18761SDValue DAGCombiner::visitLOAD(SDNode *N) {
18762 LoadSDNode *LD = cast<LoadSDNode>(N);
18763 SDValue Chain = LD->getChain();
18764 SDValue Ptr = LD->getBasePtr();
18765
18766 // If load is not volatile and there are no uses of the loaded value (and
18767 // the updated indexed value in case of indexed loads), change uses of the
18768 // chain value into uses of the chain input (i.e. delete the dead load).
18769 // TODO: Allow this for unordered atomics (see D66309)
18770 if (LD->isSimple()) {
18771 if (N->getValueType(1) == MVT::Other) {
18772 // Unindexed loads.
18773 if (!N->hasAnyUseOfValue(0)) {
18774 // It's not safe to use the two value CombineTo variant here. e.g.
18775 // v1, chain2 = load chain1, loc
18776 // v2, chain3 = load chain2, loc
18777 // v3 = add v2, c
18778 // Now we replace use of chain2 with chain1. This makes the second load
18779 // isomorphic to the one we are deleting, and thus makes this load live.
18780 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18781 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18782 dbgs() << "\n");
18783 WorklistRemover DeadNodes(*this);
18784 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18785 AddUsersToWorklist(Chain.getNode());
18786 if (N->use_empty())
18787 deleteAndRecombine(N);
18788
18789 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18790 }
18791 } else {
18792 // Indexed loads.
18793 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18794
18795 // If this load has an opaque TargetConstant offset, then we cannot split
18796 // the indexing into an add/sub directly (that TargetConstant may not be
18797 // valid for a different type of node, and we cannot convert an opaque
18798 // target constant into a regular constant).
18799 bool CanSplitIdx = canSplitIdx(LD);
18800
18801 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18802 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18803 SDValue Index;
18804 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18805 Index = SplitIndexingFromLoad(LD);
18806 // Try to fold the base pointer arithmetic into subsequent loads and
18807 // stores.
18808 AddUsersToWorklist(N);
18809 } else
18810 Index = DAG.getUNDEF(N->getValueType(1));
18811 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18812 dbgs() << "\nWith: "; Undef.dump(&DAG);
18813 dbgs() << " and 2 other values\n");
18814 WorklistRemover DeadNodes(*this);
18815 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18817 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18818 deleteAndRecombine(N);
18819 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18820 }
18821 }
18822 }
18823
18824 // If this load is directly stored, replace the load value with the stored
18825 // value.
18826 if (auto V = ForwardStoreValueToDirectLoad(LD))
18827 return V;
18828
18829 // Try to infer better alignment information than the load already has.
18830 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
18831 !LD->isAtomic()) {
18832 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18833 if (*Alignment > LD->getAlign() &&
18834 isAligned(*Alignment, LD->getSrcValueOffset())) {
18835 SDValue NewLoad = DAG.getExtLoad(
18836 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
18837 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
18838 LD->getMemOperand()->getFlags(), LD->getAAInfo());
18839 // NewLoad will always be N as we are only refining the alignment
18840 assert(NewLoad.getNode() == N);
18841 (void)NewLoad;
18842 }
18843 }
18844 }
18845
18846 if (LD->isUnindexed()) {
18847 // Walk up chain skipping non-aliasing memory nodes.
18848 SDValue BetterChain = FindBetterChain(LD, Chain);
18849
18850 // If there is a better chain.
18851 if (Chain != BetterChain) {
18852 SDValue ReplLoad;
18853
18854 // Replace the chain to void dependency.
18855 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
18856 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
18857 BetterChain, Ptr, LD->getMemOperand());
18858 } else {
18859 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
18860 LD->getValueType(0),
18861 BetterChain, Ptr, LD->getMemoryVT(),
18862 LD->getMemOperand());
18863 }
18864
18865 // Create token factor to keep old chain connected.
18866 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
18867 MVT::Other, Chain, ReplLoad.getValue(1));
18868
18869 // Replace uses with load result and token factor
18870 return CombineTo(N, ReplLoad.getValue(0), Token);
18871 }
18872 }
18873
18874 // Try transforming N to an indexed load.
18875 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18876 return SDValue(N, 0);
18877
18878 // Try to slice up N to more direct loads if the slices are mapped to
18879 // different register banks or pairing can take place.
18880 if (SliceUpLoad(N))
18881 return SDValue(N, 0);
18882
18883 return SDValue();
18884}
18885
18886namespace {
18887
18888/// Helper structure used to slice a load in smaller loads.
18889/// Basically a slice is obtained from the following sequence:
18890/// Origin = load Ty1, Base
18891/// Shift = srl Ty1 Origin, CstTy Amount
18892/// Inst = trunc Shift to Ty2
18893///
18894/// Then, it will be rewritten into:
18895/// Slice = load SliceTy, Base + SliceOffset
18896/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
18897///
18898/// SliceTy is deduced from the number of bits that are actually used to
18899/// build Inst.
18900struct LoadedSlice {
18901 /// Helper structure used to compute the cost of a slice.
18902 struct Cost {
18903 /// Are we optimizing for code size.
18904 bool ForCodeSize = false;
18905
18906 /// Various cost.
18907 unsigned Loads = 0;
18908 unsigned Truncates = 0;
18909 unsigned CrossRegisterBanksCopies = 0;
18910 unsigned ZExts = 0;
18911 unsigned Shift = 0;
18912
18913 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
18914
18915 /// Get the cost of one isolated slice.
18916 Cost(const LoadedSlice &LS, bool ForCodeSize)
18917 : ForCodeSize(ForCodeSize), Loads(1) {
18918 EVT TruncType = LS.Inst->getValueType(0);
18919 EVT LoadedType = LS.getLoadedType();
18920 if (TruncType != LoadedType &&
18921 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
18922 ZExts = 1;
18923 }
18924
18925 /// Account for slicing gain in the current cost.
18926 /// Slicing provide a few gains like removing a shift or a
18927 /// truncate. This method allows to grow the cost of the original
18928 /// load with the gain from this slice.
18929 void addSliceGain(const LoadedSlice &LS) {
18930 // Each slice saves a truncate.
18931 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
18932 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
18933 ++Truncates;
18934 // If there is a shift amount, this slice gets rid of it.
18935 if (LS.Shift)
18936 ++Shift;
18937 // If this slice can merge a cross register bank copy, account for it.
18938 if (LS.canMergeExpensiveCrossRegisterBankCopy())
18939 ++CrossRegisterBanksCopies;
18940 }
18941
18942 Cost &operator+=(const Cost &RHS) {
18943 Loads += RHS.Loads;
18944 Truncates += RHS.Truncates;
18945 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
18946 ZExts += RHS.ZExts;
18947 Shift += RHS.Shift;
18948 return *this;
18949 }
18950
18951 bool operator==(const Cost &RHS) const {
18952 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
18953 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
18954 ZExts == RHS.ZExts && Shift == RHS.Shift;
18955 }
18956
18957 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
18958
18959 bool operator<(const Cost &RHS) const {
18960 // Assume cross register banks copies are as expensive as loads.
18961 // FIXME: Do we want some more target hooks?
18962 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
18963 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
18964 // Unless we are optimizing for code size, consider the
18965 // expensive operation first.
18966 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
18967 return ExpensiveOpsLHS < ExpensiveOpsRHS;
18968 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
18969 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
18970 }
18971
18972 bool operator>(const Cost &RHS) const { return RHS < *this; }
18973
18974 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
18975
18976 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
18977 };
18978
18979 // The last instruction that represent the slice. This should be a
18980 // truncate instruction.
18981 SDNode *Inst;
18982
18983 // The original load instruction.
18984 LoadSDNode *Origin;
18985
18986 // The right shift amount in bits from the original load.
18987 unsigned Shift;
18988
18989 // The DAG from which Origin came from.
18990 // This is used to get some contextual information about legal types, etc.
18991 SelectionDAG *DAG;
18992
18993 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
18994 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
18995 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
18996
18997 /// Get the bits used in a chunk of bits \p BitWidth large.
18998 /// \return Result is \p BitWidth and has used bits set to 1 and
18999 /// not used bits set to 0.
19000 APInt getUsedBits() const {
19001 // Reproduce the trunc(lshr) sequence:
19002 // - Start from the truncated value.
19003 // - Zero extend to the desired bit width.
19004 // - Shift left.
19005 assert(Origin && "No original load to compare against.");
19006 unsigned BitWidth = Origin->getValueSizeInBits(0);
19007 assert(Inst && "This slice is not bound to an instruction");
19008 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19009 "Extracted slice is bigger than the whole type!");
19010 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19011 UsedBits.setAllBits();
19012 UsedBits = UsedBits.zext(BitWidth);
19013 UsedBits <<= Shift;
19014 return UsedBits;
19015 }
19016
19017 /// Get the size of the slice to be loaded in bytes.
19018 unsigned getLoadedSize() const {
19019 unsigned SliceSize = getUsedBits().popcount();
19020 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19021 return SliceSize / 8;
19022 }
19023
19024 /// Get the type that will be loaded for this slice.
19025 /// Note: This may not be the final type for the slice.
19026 EVT getLoadedType() const {
19027 assert(DAG && "Missing context");
19028 LLVMContext &Ctxt = *DAG->getContext();
19029 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19030 }
19031
19032 /// Get the alignment of the load used for this slice.
19033 Align getAlign() const {
19034 Align Alignment = Origin->getAlign();
19035 uint64_t Offset = getOffsetFromBase();
19036 if (Offset != 0)
19037 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19038 return Alignment;
19039 }
19040
19041 /// Check if this slice can be rewritten with legal operations.
19042 bool isLegal() const {
19043 // An invalid slice is not legal.
19044 if (!Origin || !Inst || !DAG)
19045 return false;
19046
19047 // Offsets are for indexed load only, we do not handle that.
19048 if (!Origin->getOffset().isUndef())
19049 return false;
19050
19051 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19052
19053 // Check that the type is legal.
19054 EVT SliceType = getLoadedType();
19055 if (!TLI.isTypeLegal(SliceType))
19056 return false;
19057
19058 // Check that the load is legal for this type.
19059 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19060 return false;
19061
19062 // Check that the offset can be computed.
19063 // 1. Check its type.
19064 EVT PtrType = Origin->getBasePtr().getValueType();
19065 if (PtrType == MVT::Untyped || PtrType.isExtended())
19066 return false;
19067
19068 // 2. Check that it fits in the immediate.
19069 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19070 return false;
19071
19072 // 3. Check that the computation is legal.
19073 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19074 return false;
19075
19076 // Check that the zext is legal if it needs one.
19077 EVT TruncateType = Inst->getValueType(0);
19078 if (TruncateType != SliceType &&
19079 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19080 return false;
19081
19082 return true;
19083 }
19084
19085 /// Get the offset in bytes of this slice in the original chunk of
19086 /// bits.
19087 /// \pre DAG != nullptr.
19088 uint64_t getOffsetFromBase() const {
19089 assert(DAG && "Missing context.");
19090 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19091 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19092 uint64_t Offset = Shift / 8;
19093 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19094 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19095 "The size of the original loaded type is not a multiple of a"
19096 " byte.");
19097 // If Offset is bigger than TySizeInBytes, it means we are loading all
19098 // zeros. This should have been optimized before in the process.
19099 assert(TySizeInBytes > Offset &&
19100 "Invalid shift amount for given loaded size");
19101 if (IsBigEndian)
19102 Offset = TySizeInBytes - Offset - getLoadedSize();
19103 return Offset;
19104 }
19105
19106 /// Generate the sequence of instructions to load the slice
19107 /// represented by this object and redirect the uses of this slice to
19108 /// this new sequence of instructions.
19109 /// \pre this->Inst && this->Origin are valid Instructions and this
19110 /// object passed the legal check: LoadedSlice::isLegal returned true.
19111 /// \return The last instruction of the sequence used to load the slice.
19112 SDValue loadSlice() const {
19113 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19114 const SDValue &OldBaseAddr = Origin->getBasePtr();
19115 SDValue BaseAddr = OldBaseAddr;
19116 // Get the offset in that chunk of bytes w.r.t. the endianness.
19117 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19118 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19119 if (Offset) {
19120 // BaseAddr = BaseAddr + Offset.
19121 EVT ArithType = BaseAddr.getValueType();
19122 SDLoc DL(Origin);
19123 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19124 DAG->getConstant(Offset, DL, ArithType));
19125 }
19126
19127 // Create the type of the loaded slice according to its size.
19128 EVT SliceType = getLoadedType();
19129
19130 // Create the load for the slice.
19131 SDValue LastInst =
19132 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19134 Origin->getMemOperand()->getFlags());
19135 // If the final type is not the same as the loaded type, this means that
19136 // we have to pad with zero. Create a zero extend for that.
19137 EVT FinalType = Inst->getValueType(0);
19138 if (SliceType != FinalType)
19139 LastInst =
19140 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19141 return LastInst;
19142 }
19143
19144 /// Check if this slice can be merged with an expensive cross register
19145 /// bank copy. E.g.,
19146 /// i = load i32
19147 /// f = bitcast i32 i to float
19148 bool canMergeExpensiveCrossRegisterBankCopy() const {
19149 if (!Inst || !Inst->hasOneUse())
19150 return false;
19151 SDNode *Use = *Inst->use_begin();
19152 if (Use->getOpcode() != ISD::BITCAST)
19153 return false;
19154 assert(DAG && "Missing context");
19155 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19156 EVT ResVT = Use->getValueType(0);
19157 const TargetRegisterClass *ResRC =
19158 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19159 const TargetRegisterClass *ArgRC =
19160 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19161 Use->getOperand(0)->isDivergent());
19162 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19163 return false;
19164
19165 // At this point, we know that we perform a cross-register-bank copy.
19166 // Check if it is expensive.
19168 // Assume bitcasts are cheap, unless both register classes do not
19169 // explicitly share a common sub class.
19170 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19171 return false;
19172
19173 // Check if it will be merged with the load.
19174 // 1. Check the alignment / fast memory access constraint.
19175 unsigned IsFast = 0;
19176 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19177 Origin->getAddressSpace(), getAlign(),
19178 Origin->getMemOperand()->getFlags(), &IsFast) ||
19179 !IsFast)
19180 return false;
19181
19182 // 2. Check that the load is a legal operation for that type.
19183 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19184 return false;
19185
19186 // 3. Check that we do not have a zext in the way.
19187 if (Inst->getValueType(0) != getLoadedType())
19188 return false;
19189
19190 return true;
19191 }
19192};
19193
19194} // end anonymous namespace
19195
19196/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19197/// \p UsedBits looks like 0..0 1..1 0..0.
19198static bool areUsedBitsDense(const APInt &UsedBits) {
19199 // If all the bits are one, this is dense!
19200 if (UsedBits.isAllOnes())
19201 return true;
19202
19203 // Get rid of the unused bits on the right.
19204 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19205 // Get rid of the unused bits on the left.
19206 if (NarrowedUsedBits.countl_zero())
19207 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19208 // Check that the chunk of bits is completely used.
19209 return NarrowedUsedBits.isAllOnes();
19210}
19211
19212/// Check whether or not \p First and \p Second are next to each other
19213/// in memory. This means that there is no hole between the bits loaded
19214/// by \p First and the bits loaded by \p Second.
19215static bool areSlicesNextToEachOther(const LoadedSlice &First,
19216 const LoadedSlice &Second) {
19217 assert(First.Origin == Second.Origin && First.Origin &&
19218 "Unable to match different memory origins.");
19219 APInt UsedBits = First.getUsedBits();
19220 assert((UsedBits & Second.getUsedBits()) == 0 &&
19221 "Slices are not supposed to overlap.");
19222 UsedBits |= Second.getUsedBits();
19223 return areUsedBitsDense(UsedBits);
19224}
19225
19226/// Adjust the \p GlobalLSCost according to the target
19227/// paring capabilities and the layout of the slices.
19228/// \pre \p GlobalLSCost should account for at least as many loads as
19229/// there is in the slices in \p LoadedSlices.
19231 LoadedSlice::Cost &GlobalLSCost) {
19232 unsigned NumberOfSlices = LoadedSlices.size();
19233 // If there is less than 2 elements, no pairing is possible.
19234 if (NumberOfSlices < 2)
19235 return;
19236
19237 // Sort the slices so that elements that are likely to be next to each
19238 // other in memory are next to each other in the list.
19239 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19240 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19241 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19242 });
19243 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19244 // First (resp. Second) is the first (resp. Second) potentially candidate
19245 // to be placed in a paired load.
19246 const LoadedSlice *First = nullptr;
19247 const LoadedSlice *Second = nullptr;
19248 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19249 // Set the beginning of the pair.
19250 First = Second) {
19251 Second = &LoadedSlices[CurrSlice];
19252
19253 // If First is NULL, it means we start a new pair.
19254 // Get to the next slice.
19255 if (!First)
19256 continue;
19257
19258 EVT LoadedType = First->getLoadedType();
19259
19260 // If the types of the slices are different, we cannot pair them.
19261 if (LoadedType != Second->getLoadedType())
19262 continue;
19263
19264 // Check if the target supplies paired loads for this type.
19265 Align RequiredAlignment;
19266 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19267 // move to the next pair, this type is hopeless.
19268 Second = nullptr;
19269 continue;
19270 }
19271 // Check if we meet the alignment requirement.
19272 if (First->getAlign() < RequiredAlignment)
19273 continue;
19274
19275 // Check that both loads are next to each other in memory.
19276 if (!areSlicesNextToEachOther(*First, *Second))
19277 continue;
19278
19279 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19280 --GlobalLSCost.Loads;
19281 // Move to the next pair.
19282 Second = nullptr;
19283 }
19284}
19285
19286/// Check the profitability of all involved LoadedSlice.
19287/// Currently, it is considered profitable if there is exactly two
19288/// involved slices (1) which are (2) next to each other in memory, and
19289/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19290///
19291/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19292/// the elements themselves.
19293///
19294/// FIXME: When the cost model will be mature enough, we can relax
19295/// constraints (1) and (2).
19297 const APInt &UsedBits, bool ForCodeSize) {
19298 unsigned NumberOfSlices = LoadedSlices.size();
19300 return NumberOfSlices > 1;
19301
19302 // Check (1).
19303 if (NumberOfSlices != 2)
19304 return false;
19305
19306 // Check (2).
19307 if (!areUsedBitsDense(UsedBits))
19308 return false;
19309
19310 // Check (3).
19311 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19312 // The original code has one big load.
19313 OrigCost.Loads = 1;
19314 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19315 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19316 // Accumulate the cost of all the slices.
19317 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19318 GlobalSlicingCost += SliceCost;
19319
19320 // Account as cost in the original configuration the gain obtained
19321 // with the current slices.
19322 OrigCost.addSliceGain(LS);
19323 }
19324
19325 // If the target supports paired load, adjust the cost accordingly.
19326 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19327 return OrigCost > GlobalSlicingCost;
19328}
19329
19330/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19331/// operations, split it in the various pieces being extracted.
19332///
19333/// This sort of thing is introduced by SROA.
19334/// This slicing takes care not to insert overlapping loads.
19335/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19336bool DAGCombiner::SliceUpLoad(SDNode *N) {
19337 if (Level < AfterLegalizeDAG)
19338 return false;
19339
19340 LoadSDNode *LD = cast<LoadSDNode>(N);
19341 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19342 !LD->getValueType(0).isInteger())
19343 return false;
19344
19345 // The algorithm to split up a load of a scalable vector into individual
19346 // elements currently requires knowing the length of the loaded type,
19347 // so will need adjusting to work on scalable vectors.
19348 if (LD->getValueType(0).isScalableVector())
19349 return false;
19350
19351 // Keep track of already used bits to detect overlapping values.
19352 // In that case, we will just abort the transformation.
19353 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19354
19355 SmallVector<LoadedSlice, 4> LoadedSlices;
19356
19357 // Check if this load is used as several smaller chunks of bits.
19358 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19359 // of computation for each trunc.
19360 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19361 UI != UIEnd; ++UI) {
19362 // Skip the uses of the chain.
19363 if (UI.getUse().getResNo() != 0)
19364 continue;
19365
19366 SDNode *User = *UI;
19367 unsigned Shift = 0;
19368
19369 // Check if this is a trunc(lshr).
19370 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19371 isa<ConstantSDNode>(User->getOperand(1))) {
19372 Shift = User->getConstantOperandVal(1);
19373 User = *User->use_begin();
19374 }
19375
19376 // At this point, User is a Truncate, iff we encountered, trunc or
19377 // trunc(lshr).
19378 if (User->getOpcode() != ISD::TRUNCATE)
19379 return false;
19380
19381 // The width of the type must be a power of 2 and greater than 8-bits.
19382 // Otherwise the load cannot be represented in LLVM IR.
19383 // Moreover, if we shifted with a non-8-bits multiple, the slice
19384 // will be across several bytes. We do not support that.
19385 unsigned Width = User->getValueSizeInBits(0);
19386 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19387 return false;
19388
19389 // Build the slice for this chain of computations.
19390 LoadedSlice LS(User, LD, Shift, &DAG);
19391 APInt CurrentUsedBits = LS.getUsedBits();
19392
19393 // Check if this slice overlaps with another.
19394 if ((CurrentUsedBits & UsedBits) != 0)
19395 return false;
19396 // Update the bits used globally.
19397 UsedBits |= CurrentUsedBits;
19398
19399 // Check if the new slice would be legal.
19400 if (!LS.isLegal())
19401 return false;
19402
19403 // Record the slice.
19404 LoadedSlices.push_back(LS);
19405 }
19406
19407 // Abort slicing if it does not seem to be profitable.
19408 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19409 return false;
19410
19411 ++SlicedLoads;
19412
19413 // Rewrite each chain to use an independent load.
19414 // By construction, each chain can be represented by a unique load.
19415
19416 // Prepare the argument for the new token factor for all the slices.
19417 SmallVector<SDValue, 8> ArgChains;
19418 for (const LoadedSlice &LS : LoadedSlices) {
19419 SDValue SliceInst = LS.loadSlice();
19420 CombineTo(LS.Inst, SliceInst, true);
19421 if (SliceInst.getOpcode() != ISD::LOAD)
19422 SliceInst = SliceInst.getOperand(0);
19423 assert(SliceInst->getOpcode() == ISD::LOAD &&
19424 "It takes more than a zext to get to the loaded slice!!");
19425 ArgChains.push_back(SliceInst.getValue(1));
19426 }
19427
19428 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19429 ArgChains);
19430 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19431 AddToWorklist(Chain.getNode());
19432 return true;
19433}
19434
19435/// Check to see if V is (and load (ptr), imm), where the load is having
19436/// specific bytes cleared out. If so, return the byte size being masked out
19437/// and the shift amount.
19438static std::pair<unsigned, unsigned>
19440 std::pair<unsigned, unsigned> Result(0, 0);
19441
19442 // Check for the structure we're looking for.
19443 if (V->getOpcode() != ISD::AND ||
19444 !isa<ConstantSDNode>(V->getOperand(1)) ||
19445 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19446 return Result;
19447
19448 // Check the chain and pointer.
19449 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19450 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19451
19452 // This only handles simple types.
19453 if (V.getValueType() != MVT::i16 &&
19454 V.getValueType() != MVT::i32 &&
19455 V.getValueType() != MVT::i64)
19456 return Result;
19457
19458 // Check the constant mask. Invert it so that the bits being masked out are
19459 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19460 // follow the sign bit for uniformity.
19461 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19462 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19463 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19464 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19465 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19466 if (NotMaskLZ == 64) return Result; // All zero mask.
19467
19468 // See if we have a continuous run of bits. If so, we have 0*1+0*
19469 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19470 return Result;
19471
19472 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19473 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19474 NotMaskLZ -= 64-V.getValueSizeInBits();
19475
19476 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19477 switch (MaskedBytes) {
19478 case 1:
19479 case 2:
19480 case 4: break;
19481 default: return Result; // All one mask, or 5-byte mask.
19482 }
19483
19484 // Verify that the first bit starts at a multiple of mask so that the access
19485 // is aligned the same as the access width.
19486 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19487
19488 // For narrowing to be valid, it must be the case that the load the
19489 // immediately preceding memory operation before the store.
19490 if (LD == Chain.getNode())
19491 ; // ok.
19492 else if (Chain->getOpcode() == ISD::TokenFactor &&
19493 SDValue(LD, 1).hasOneUse()) {
19494 // LD has only 1 chain use so they are no indirect dependencies.
19495 if (!LD->isOperandOf(Chain.getNode()))
19496 return Result;
19497 } else
19498 return Result; // Fail.
19499
19500 Result.first = MaskedBytes;
19501 Result.second = NotMaskTZ/8;
19502 return Result;
19503}
19504
19505/// Check to see if IVal is something that provides a value as specified by
19506/// MaskInfo. If so, replace the specified store with a narrower store of
19507/// truncated IVal.
19508static SDValue
19509ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19510 SDValue IVal, StoreSDNode *St,
19511 DAGCombiner *DC) {
19512 unsigned NumBytes = MaskInfo.first;
19513 unsigned ByteShift = MaskInfo.second;
19514 SelectionDAG &DAG = DC->getDAG();
19515
19516 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19517 // that uses this. If not, this is not a replacement.
19518 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19519 ByteShift*8, (ByteShift+NumBytes)*8);
19520 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19521
19522 // Check that it is legal on the target to do this. It is legal if the new
19523 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19524 // legalization. If the source type is legal, but the store type isn't, see
19525 // if we can use a truncating store.
19526 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19527 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19528 bool UseTruncStore;
19529 if (DC->isTypeLegal(VT))
19530 UseTruncStore = false;
19531 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19532 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19533 UseTruncStore = true;
19534 else
19535 return SDValue();
19536
19537 // Can't do this for indexed stores.
19538 if (St->isIndexed())
19539 return SDValue();
19540
19541 // Check that the target doesn't think this is a bad idea.
19542 if (St->getMemOperand() &&
19543 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19544 *St->getMemOperand()))
19545 return SDValue();
19546
19547 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19548 // shifted by ByteShift and truncated down to NumBytes.
19549 if (ByteShift) {
19550 SDLoc DL(IVal);
19551 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19552 DAG.getConstant(ByteShift*8, DL,
19553 DC->getShiftAmountTy(IVal.getValueType())));
19554 }
19555
19556 // Figure out the offset for the store and the alignment of the access.
19557 unsigned StOffset;
19558 if (DAG.getDataLayout().isLittleEndian())
19559 StOffset = ByteShift;
19560 else
19561 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19562
19563 SDValue Ptr = St->getBasePtr();
19564 if (StOffset) {
19565 SDLoc DL(IVal);
19567 }
19568
19569 ++OpsNarrowed;
19570 if (UseTruncStore)
19571 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19572 St->getPointerInfo().getWithOffset(StOffset),
19573 VT, St->getOriginalAlign());
19574
19575 // Truncate down to the new size.
19576 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19577
19578 return DAG
19579 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19580 St->getPointerInfo().getWithOffset(StOffset),
19581 St->getOriginalAlign());
19582}
19583
19584/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19585/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19586/// narrowing the load and store if it would end up being a win for performance
19587/// or code size.
19588SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19589 StoreSDNode *ST = cast<StoreSDNode>(N);
19590 if (!ST->isSimple())
19591 return SDValue();
19592
19593 SDValue Chain = ST->getChain();
19594 SDValue Value = ST->getValue();
19595 SDValue Ptr = ST->getBasePtr();
19596 EVT VT = Value.getValueType();
19597
19598 if (ST->isTruncatingStore() || VT.isVector())
19599 return SDValue();
19600
19601 unsigned Opc = Value.getOpcode();
19602
19603 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19604 !Value.hasOneUse())
19605 return SDValue();
19606
19607 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19608 // is a byte mask indicating a consecutive number of bytes, check to see if
19609 // Y is known to provide just those bytes. If so, we try to replace the
19610 // load + replace + store sequence with a single (narrower) store, which makes
19611 // the load dead.
19613 std::pair<unsigned, unsigned> MaskedLoad;
19614 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19615 if (MaskedLoad.first)
19616 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19617 Value.getOperand(1), ST,this))
19618 return NewST;
19619
19620 // Or is commutative, so try swapping X and Y.
19621 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19622 if (MaskedLoad.first)
19623 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19624 Value.getOperand(0), ST,this))
19625 return NewST;
19626 }
19627
19629 return SDValue();
19630
19631 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19632 return SDValue();
19633
19634 SDValue N0 = Value.getOperand(0);
19635 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19636 Chain == SDValue(N0.getNode(), 1)) {
19637 LoadSDNode *LD = cast<LoadSDNode>(N0);
19638 if (LD->getBasePtr() != Ptr ||
19639 LD->getPointerInfo().getAddrSpace() !=
19640 ST->getPointerInfo().getAddrSpace())
19641 return SDValue();
19642
19643 // Find the type to narrow it the load / op / store to.
19644 SDValue N1 = Value.getOperand(1);
19645 unsigned BitWidth = N1.getValueSizeInBits();
19646 APInt Imm = N1->getAsAPIntVal();
19647 if (Opc == ISD::AND)
19649 if (Imm == 0 || Imm.isAllOnes())
19650 return SDValue();
19651 unsigned ShAmt = Imm.countr_zero();
19652 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19653 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19654 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19655 // The narrowing should be profitable, the load/store operation should be
19656 // legal (or custom) and the store size should be equal to the NewVT width.
19657 while (NewBW < BitWidth &&
19658 (NewVT.getStoreSizeInBits() != NewBW ||
19659 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19660 !TLI.isNarrowingProfitable(VT, NewVT))) {
19661 NewBW = NextPowerOf2(NewBW);
19662 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19663 }
19664 if (NewBW >= BitWidth)
19665 return SDValue();
19666
19667 // If the lsb changed does not start at the type bitwidth boundary,
19668 // start at the previous one.
19669 if (ShAmt % NewBW)
19670 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19672 std::min(BitWidth, ShAmt + NewBW));
19673 if ((Imm & Mask) == Imm) {
19674 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19675 if (Opc == ISD::AND)
19676 NewImm ^= APInt::getAllOnes(NewBW);
19677 uint64_t PtrOff = ShAmt / 8;
19678 // For big endian targets, we need to adjust the offset to the pointer to
19679 // load the correct bytes.
19680 if (DAG.getDataLayout().isBigEndian())
19681 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19682
19683 unsigned IsFast = 0;
19684 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19685 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19686 LD->getAddressSpace(), NewAlign,
19687 LD->getMemOperand()->getFlags(), &IsFast) ||
19688 !IsFast)
19689 return SDValue();
19690
19691 SDValue NewPtr =
19693 SDValue NewLD =
19694 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19695 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19696 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19697 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19698 DAG.getConstant(NewImm, SDLoc(Value),
19699 NewVT));
19700 SDValue NewST =
19701 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19702 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19703
19704 AddToWorklist(NewPtr.getNode());
19705 AddToWorklist(NewLD.getNode());
19706 AddToWorklist(NewVal.getNode());
19707 WorklistRemover DeadNodes(*this);
19708 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19709 ++OpsNarrowed;
19710 return NewST;
19711 }
19712 }
19713
19714 return SDValue();
19715}
19716
19717/// For a given floating point load / store pair, if the load value isn't used
19718/// by any other operations, then consider transforming the pair to integer
19719/// load / store operations if the target deems the transformation profitable.
19720SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19721 StoreSDNode *ST = cast<StoreSDNode>(N);
19722 SDValue Value = ST->getValue();
19723 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19724 Value.hasOneUse()) {
19725 LoadSDNode *LD = cast<LoadSDNode>(Value);
19726 EVT VT = LD->getMemoryVT();
19727 if (!VT.isFloatingPoint() ||
19728 VT != ST->getMemoryVT() ||
19729 LD->isNonTemporal() ||
19730 ST->isNonTemporal() ||
19731 LD->getPointerInfo().getAddrSpace() != 0 ||
19732 ST->getPointerInfo().getAddrSpace() != 0)
19733 return SDValue();
19734
19735 TypeSize VTSize = VT.getSizeInBits();
19736
19737 // We don't know the size of scalable types at compile time so we cannot
19738 // create an integer of the equivalent size.
19739 if (VTSize.isScalable())
19740 return SDValue();
19741
19742 unsigned FastLD = 0, FastST = 0;
19743 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19744 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19745 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19748 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19749 *LD->getMemOperand(), &FastLD) ||
19750 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19751 *ST->getMemOperand(), &FastST) ||
19752 !FastLD || !FastST)
19753 return SDValue();
19754
19755 SDValue NewLD =
19756 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19757 LD->getPointerInfo(), LD->getAlign());
19758
19759 SDValue NewST =
19760 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19761 ST->getPointerInfo(), ST->getAlign());
19762
19763 AddToWorklist(NewLD.getNode());
19764 AddToWorklist(NewST.getNode());
19765 WorklistRemover DeadNodes(*this);
19766 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19767 ++LdStFP2Int;
19768 return NewST;
19769 }
19770
19771 return SDValue();
19772}
19773
19774// This is a helper function for visitMUL to check the profitability
19775// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19776// MulNode is the original multiply, AddNode is (add x, c1),
19777// and ConstNode is c2.
19778//
19779// If the (add x, c1) has multiple uses, we could increase
19780// the number of adds if we make this transformation.
19781// It would only be worth doing this if we can remove a
19782// multiply in the process. Check for that here.
19783// To illustrate:
19784// (A + c1) * c3
19785// (A + c2) * c3
19786// We're checking for cases where we have common "c3 * A" expressions.
19787bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19788 SDValue ConstNode) {
19789 APInt Val;
19790
19791 // If the add only has one use, and the target thinks the folding is
19792 // profitable or does not lead to worse code, this would be OK to do.
19793 if (AddNode->hasOneUse() &&
19794 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19795 return true;
19796
19797 // Walk all the users of the constant with which we're multiplying.
19798 for (SDNode *Use : ConstNode->uses()) {
19799 if (Use == MulNode) // This use is the one we're on right now. Skip it.
19800 continue;
19801
19802 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19803 SDNode *OtherOp;
19804 SDNode *MulVar = AddNode.getOperand(0).getNode();
19805
19806 // OtherOp is what we're multiplying against the constant.
19807 if (Use->getOperand(0) == ConstNode)
19808 OtherOp = Use->getOperand(1).getNode();
19809 else
19810 OtherOp = Use->getOperand(0).getNode();
19811
19812 // Check to see if multiply is with the same operand of our "add".
19813 //
19814 // ConstNode = CONST
19815 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
19816 // ...
19817 // AddNode = (A + c1) <-- MulVar is A.
19818 // = AddNode * ConstNode <-- current visiting instruction.
19819 //
19820 // If we make this transformation, we will have a common
19821 // multiply (ConstNode * A) that we can save.
19822 if (OtherOp == MulVar)
19823 return true;
19824
19825 // Now check to see if a future expansion will give us a common
19826 // multiply.
19827 //
19828 // ConstNode = CONST
19829 // AddNode = (A + c1)
19830 // ... = AddNode * ConstNode <-- current visiting instruction.
19831 // ...
19832 // OtherOp = (A + c2)
19833 // Use = OtherOp * ConstNode <-- visiting Use.
19834 //
19835 // If we make this transformation, we will have a common
19836 // multiply (CONST * A) after we also do the same transformation
19837 // to the "t2" instruction.
19838 if (OtherOp->getOpcode() == ISD::ADD &&
19840 OtherOp->getOperand(0).getNode() == MulVar)
19841 return true;
19842 }
19843 }
19844
19845 // Didn't find a case where this would be profitable.
19846 return false;
19847}
19848
19849SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
19850 unsigned NumStores) {
19853 SDLoc StoreDL(StoreNodes[0].MemNode);
19854
19855 for (unsigned i = 0; i < NumStores; ++i) {
19856 Visited.insert(StoreNodes[i].MemNode);
19857 }
19858
19859 // don't include nodes that are children or repeated nodes.
19860 for (unsigned i = 0; i < NumStores; ++i) {
19861 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
19862 Chains.push_back(StoreNodes[i].MemNode->getChain());
19863 }
19864
19865 assert(!Chains.empty() && "Chain should have generated a chain");
19866 return DAG.getTokenFactor(StoreDL, Chains);
19867}
19868
19869bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
19870 const Value *UnderlyingObj = nullptr;
19871 for (const auto &MemOp : StoreNodes) {
19872 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
19873 // Pseudo value like stack frame has its own frame index and size, should
19874 // not use the first store's frame index for other frames.
19875 if (MMO->getPseudoValue())
19876 return false;
19877
19878 if (!MMO->getValue())
19879 return false;
19880
19881 const Value *Obj = getUnderlyingObject(MMO->getValue());
19882
19883 if (UnderlyingObj && UnderlyingObj != Obj)
19884 return false;
19885
19886 if (!UnderlyingObj)
19887 UnderlyingObj = Obj;
19888 }
19889
19890 return true;
19891}
19892
19893bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
19894 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
19895 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
19896 // Make sure we have something to merge.
19897 if (NumStores < 2)
19898 return false;
19899
19900 assert((!UseTrunc || !UseVector) &&
19901 "This optimization cannot emit a vector truncating store");
19902
19903 // The latest Node in the DAG.
19904 SDLoc DL(StoreNodes[0].MemNode);
19905
19906 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
19907 unsigned SizeInBits = NumStores * ElementSizeBits;
19908 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19909
19910 std::optional<MachineMemOperand::Flags> Flags;
19911 AAMDNodes AAInfo;
19912 for (unsigned I = 0; I != NumStores; ++I) {
19913 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19914 if (!Flags) {
19915 Flags = St->getMemOperand()->getFlags();
19916 AAInfo = St->getAAInfo();
19917 continue;
19918 }
19919 // Skip merging if there's an inconsistent flag.
19920 if (Flags != St->getMemOperand()->getFlags())
19921 return false;
19922 // Concatenate AA metadata.
19923 AAInfo = AAInfo.concat(St->getAAInfo());
19924 }
19925
19926 EVT StoreTy;
19927 if (UseVector) {
19928 unsigned Elts = NumStores * NumMemElts;
19929 // Get the type for the merged vector store.
19930 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19931 } else
19932 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
19933
19934 SDValue StoredVal;
19935 if (UseVector) {
19936 if (IsConstantSrc) {
19937 SmallVector<SDValue, 8> BuildVector;
19938 for (unsigned I = 0; I != NumStores; ++I) {
19939 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19940 SDValue Val = St->getValue();
19941 // If constant is of the wrong type, convert it now. This comes up
19942 // when one of our stores was truncating.
19943 if (MemVT != Val.getValueType()) {
19944 Val = peekThroughBitcasts(Val);
19945 // Deal with constants of wrong size.
19946 if (ElementSizeBits != Val.getValueSizeInBits()) {
19947 auto *C = dyn_cast<ConstantSDNode>(Val);
19948 if (!C)
19949 // Not clear how to truncate FP values.
19950 // TODO: Handle truncation of build_vector constants
19951 return false;
19952
19953 EVT IntMemVT =
19955 Val = DAG.getConstant(C->getAPIntValue()
19956 .zextOrTrunc(Val.getValueSizeInBits())
19957 .zextOrTrunc(ElementSizeBits),
19958 SDLoc(C), IntMemVT);
19959 }
19960 // Make sure correctly size type is the correct type.
19961 Val = DAG.getBitcast(MemVT, Val);
19962 }
19963 BuildVector.push_back(Val);
19964 }
19965 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19967 DL, StoreTy, BuildVector);
19968 } else {
19970 for (unsigned i = 0; i < NumStores; ++i) {
19971 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
19973 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
19974 // type MemVT. If the underlying value is not the correct
19975 // type, but it is an extraction of an appropriate vector we
19976 // can recast Val to be of the correct type. This may require
19977 // converting between EXTRACT_VECTOR_ELT and
19978 // EXTRACT_SUBVECTOR.
19979 if ((MemVT != Val.getValueType()) &&
19982 EVT MemVTScalarTy = MemVT.getScalarType();
19983 // We may need to add a bitcast here to get types to line up.
19984 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
19985 Val = DAG.getBitcast(MemVT, Val);
19986 } else if (MemVT.isVector() &&
19988 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
19989 } else {
19990 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
19992 SDValue Vec = Val.getOperand(0);
19993 SDValue Idx = Val.getOperand(1);
19994 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
19995 }
19996 }
19997 Ops.push_back(Val);
19998 }
19999
20000 // Build the extracted vector elements back into a vector.
20001 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20003 DL, StoreTy, Ops);
20004 }
20005 } else {
20006 // We should always use a vector store when merging extracted vector
20007 // elements, so this path implies a store of constants.
20008 assert(IsConstantSrc && "Merged vector elements should use vector store");
20009
20010 APInt StoreInt(SizeInBits, 0);
20011
20012 // Construct a single integer constant which is made of the smaller
20013 // constant inputs.
20014 bool IsLE = DAG.getDataLayout().isLittleEndian();
20015 for (unsigned i = 0; i < NumStores; ++i) {
20016 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20017 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20018
20019 SDValue Val = St->getValue();
20020 Val = peekThroughBitcasts(Val);
20021 StoreInt <<= ElementSizeBits;
20022 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20023 StoreInt |= C->getAPIntValue()
20024 .zextOrTrunc(ElementSizeBits)
20025 .zextOrTrunc(SizeInBits);
20026 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20027 StoreInt |= C->getValueAPF()
20028 .bitcastToAPInt()
20029 .zextOrTrunc(ElementSizeBits)
20030 .zextOrTrunc(SizeInBits);
20031 // If fp truncation is necessary give up for now.
20032 if (MemVT.getSizeInBits() != ElementSizeBits)
20033 return false;
20034 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20036 // Not yet handled
20037 return false;
20038 } else {
20039 llvm_unreachable("Invalid constant element type");
20040 }
20041 }
20042
20043 // Create the new Load and Store operations.
20044 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20045 }
20046
20047 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20048 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20049 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20050
20051 // make sure we use trunc store if it's necessary to be legal.
20052 // When generate the new widen store, if the first store's pointer info can
20053 // not be reused, discard the pointer info except the address space because
20054 // now the widen store can not be represented by the original pointer info
20055 // which is for the narrow memory object.
20056 SDValue NewStore;
20057 if (!UseTrunc) {
20058 NewStore = DAG.getStore(
20059 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20060 CanReusePtrInfo
20061 ? FirstInChain->getPointerInfo()
20062 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20063 FirstInChain->getAlign(), *Flags, AAInfo);
20064 } else { // Must be realized as a trunc store
20065 EVT LegalizedStoredValTy =
20066 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20067 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20068 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20069 SDValue ExtendedStoreVal =
20070 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20071 LegalizedStoredValTy);
20072 NewStore = DAG.getTruncStore(
20073 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20074 CanReusePtrInfo
20075 ? FirstInChain->getPointerInfo()
20076 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20077 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20078 AAInfo);
20079 }
20080
20081 // Replace all merged stores with the new store.
20082 for (unsigned i = 0; i < NumStores; ++i)
20083 CombineTo(StoreNodes[i].MemNode, NewStore);
20084
20085 AddToWorklist(NewChain.getNode());
20086 return true;
20087}
20088
20089void DAGCombiner::getStoreMergeCandidates(
20090 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20091 SDNode *&RootNode) {
20092 // This holds the base pointer, index, and the offset in bytes from the base
20093 // pointer. We must have a base and an offset. Do not handle stores to undef
20094 // base pointers.
20096 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20097 return;
20098
20100 StoreSource StoreSrc = getStoreSource(Val);
20101 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20102
20103 // Match on loadbaseptr if relevant.
20104 EVT MemVT = St->getMemoryVT();
20105 BaseIndexOffset LBasePtr;
20106 EVT LoadVT;
20107 if (StoreSrc == StoreSource::Load) {
20108 auto *Ld = cast<LoadSDNode>(Val);
20109 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20110 LoadVT = Ld->getMemoryVT();
20111 // Load and store should be the same type.
20112 if (MemVT != LoadVT)
20113 return;
20114 // Loads must only have one use.
20115 if (!Ld->hasNUsesOfValue(1, 0))
20116 return;
20117 // The memory operands must not be volatile/indexed/atomic.
20118 // TODO: May be able to relax for unordered atomics (see D66309)
20119 if (!Ld->isSimple() || Ld->isIndexed())
20120 return;
20121 }
20122 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20123 int64_t &Offset) -> bool {
20124 // The memory operands must not be volatile/indexed/atomic.
20125 // TODO: May be able to relax for unordered atomics (see D66309)
20126 if (!Other->isSimple() || Other->isIndexed())
20127 return false;
20128 // Don't mix temporal stores with non-temporal stores.
20129 if (St->isNonTemporal() != Other->isNonTemporal())
20130 return false;
20132 return false;
20133 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20134 // Allow merging constants of different types as integers.
20135 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20136 : Other->getMemoryVT() != MemVT;
20137 switch (StoreSrc) {
20138 case StoreSource::Load: {
20139 if (NoTypeMatch)
20140 return false;
20141 // The Load's Base Ptr must also match.
20142 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20143 if (!OtherLd)
20144 return false;
20145 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20146 if (LoadVT != OtherLd->getMemoryVT())
20147 return false;
20148 // Loads must only have one use.
20149 if (!OtherLd->hasNUsesOfValue(1, 0))
20150 return false;
20151 // The memory operands must not be volatile/indexed/atomic.
20152 // TODO: May be able to relax for unordered atomics (see D66309)
20153 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20154 return false;
20155 // Don't mix temporal loads with non-temporal loads.
20156 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20157 return false;
20158 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20159 *OtherLd))
20160 return false;
20161 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20162 return false;
20163 break;
20164 }
20165 case StoreSource::Constant:
20166 if (NoTypeMatch)
20167 return false;
20168 if (getStoreSource(OtherBC) != StoreSource::Constant)
20169 return false;
20170 break;
20171 case StoreSource::Extract:
20172 // Do not merge truncated stores here.
20173 if (Other->isTruncatingStore())
20174 return false;
20175 if (!MemVT.bitsEq(OtherBC.getValueType()))
20176 return false;
20177 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20178 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20179 return false;
20180 break;
20181 default:
20182 llvm_unreachable("Unhandled store source for merging");
20183 }
20185 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20186 };
20187
20188 // Check if the pair of StoreNode and the RootNode already bail out many
20189 // times which is over the limit in dependence check.
20190 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20191 SDNode *RootNode) -> bool {
20192 auto RootCount = StoreRootCountMap.find(StoreNode);
20193 return RootCount != StoreRootCountMap.end() &&
20194 RootCount->second.first == RootNode &&
20195 RootCount->second.second > StoreMergeDependenceLimit;
20196 };
20197
20198 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20199 // This must be a chain use.
20200 if (UseIter.getOperandNo() != 0)
20201 return;
20202 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20204 int64_t PtrDiff;
20205 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20206 !OverLimitInDependenceCheck(OtherStore, RootNode))
20207 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20208 }
20209 };
20210
20211 // We looking for a root node which is an ancestor to all mergable
20212 // stores. We search up through a load, to our root and then down
20213 // through all children. For instance we will find Store{1,2,3} if
20214 // St is Store1, Store2. or Store3 where the root is not a load
20215 // which always true for nonvolatile ops. TODO: Expand
20216 // the search to find all valid candidates through multiple layers of loads.
20217 //
20218 // Root
20219 // |-------|-------|
20220 // Load Load Store3
20221 // | |
20222 // Store1 Store2
20223 //
20224 // FIXME: We should be able to climb and
20225 // descend TokenFactors to find candidates as well.
20226
20227 RootNode = St->getChain().getNode();
20228
20229 unsigned NumNodesExplored = 0;
20230 const unsigned MaxSearchNodes = 1024;
20231 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20232 RootNode = Ldn->getChain().getNode();
20233 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20234 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20235 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20236 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20237 TryToAddCandidate(I2);
20238 }
20239 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20240 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20241 TryToAddCandidate(I);
20242 }
20243 }
20244 } else {
20245 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20246 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20247 TryToAddCandidate(I);
20248 }
20249}
20250
20251// We need to check that merging these stores does not cause a loop in the
20252// DAG. Any store candidate may depend on another candidate indirectly through
20253// its operands. Check in parallel by searching up from operands of candidates.
20254bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20255 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20256 SDNode *RootNode) {
20257 // FIXME: We should be able to truncate a full search of
20258 // predecessors by doing a BFS and keeping tabs the originating
20259 // stores from which worklist nodes come from in a similar way to
20260 // TokenFactor simplfication.
20261
20264
20265 // RootNode is a predecessor to all candidates so we need not search
20266 // past it. Add RootNode (peeking through TokenFactors). Do not count
20267 // these towards size check.
20268
20269 Worklist.push_back(RootNode);
20270 while (!Worklist.empty()) {
20271 auto N = Worklist.pop_back_val();
20272 if (!Visited.insert(N).second)
20273 continue; // Already present in Visited.
20274 if (N->getOpcode() == ISD::TokenFactor) {
20275 for (SDValue Op : N->ops())
20276 Worklist.push_back(Op.getNode());
20277 }
20278 }
20279
20280 // Don't count pruning nodes towards max.
20281 unsigned int Max = 1024 + Visited.size();
20282 // Search Ops of store candidates.
20283 for (unsigned i = 0; i < NumStores; ++i) {
20284 SDNode *N = StoreNodes[i].MemNode;
20285 // Of the 4 Store Operands:
20286 // * Chain (Op 0) -> We have already considered these
20287 // in candidate selection, but only by following the
20288 // chain dependencies. We could still have a chain
20289 // dependency to a load, that has a non-chain dep to
20290 // another load, that depends on a store, etc. So it is
20291 // possible to have dependencies that consist of a mix
20292 // of chain and non-chain deps, and we need to include
20293 // chain operands in the analysis here..
20294 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20295 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20296 // but aren't necessarily fromt the same base node, so
20297 // cycles possible (e.g. via indexed store).
20298 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20299 // non-indexed stores). Not constant on all targets (e.g. ARM)
20300 // and so can participate in a cycle.
20301 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20302 Worklist.push_back(N->getOperand(j).getNode());
20303 }
20304 // Search through DAG. We can stop early if we find a store node.
20305 for (unsigned i = 0; i < NumStores; ++i)
20306 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20307 Max)) {
20308 // If the searching bail out, record the StoreNode and RootNode in the
20309 // StoreRootCountMap. If we have seen the pair many times over a limit,
20310 // we won't add the StoreNode into StoreNodes set again.
20311 if (Visited.size() >= Max) {
20312 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20313 if (RootCount.first == RootNode)
20314 RootCount.second++;
20315 else
20316 RootCount = {RootNode, 1};
20317 }
20318 return false;
20319 }
20320 return true;
20321}
20322
20323unsigned
20324DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20325 int64_t ElementSizeBytes) const {
20326 while (true) {
20327 // Find a store past the width of the first store.
20328 size_t StartIdx = 0;
20329 while ((StartIdx + 1 < StoreNodes.size()) &&
20330 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20331 StoreNodes[StartIdx + 1].OffsetFromBase)
20332 ++StartIdx;
20333
20334 // Bail if we don't have enough candidates to merge.
20335 if (StartIdx + 1 >= StoreNodes.size())
20336 return 0;
20337
20338 // Trim stores that overlapped with the first store.
20339 if (StartIdx)
20340 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20341
20342 // Scan the memory operations on the chain and find the first
20343 // non-consecutive store memory address.
20344 unsigned NumConsecutiveStores = 1;
20345 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20346 // Check that the addresses are consecutive starting from the second
20347 // element in the list of stores.
20348 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20349 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20350 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20351 break;
20352 NumConsecutiveStores = i + 1;
20353 }
20354 if (NumConsecutiveStores > 1)
20355 return NumConsecutiveStores;
20356
20357 // There are no consecutive stores at the start of the list.
20358 // Remove the first store and try again.
20359 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20360 }
20361}
20362
20363bool DAGCombiner::tryStoreMergeOfConstants(
20364 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20365 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20366 LLVMContext &Context = *DAG.getContext();
20367 const DataLayout &DL = DAG.getDataLayout();
20368 int64_t ElementSizeBytes = MemVT.getStoreSize();
20369 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20370 bool MadeChange = false;
20371
20372 // Store the constants into memory as one consecutive store.
20373 while (NumConsecutiveStores >= 2) {
20374 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20375 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20376 Align FirstStoreAlign = FirstInChain->getAlign();
20377 unsigned LastLegalType = 1;
20378 unsigned LastLegalVectorType = 1;
20379 bool LastIntegerTrunc = false;
20380 bool NonZero = false;
20381 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20382 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20383 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20384 SDValue StoredVal = ST->getValue();
20385 bool IsElementZero = false;
20386 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20387 IsElementZero = C->isZero();
20388 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20389 IsElementZero = C->getConstantFPValue()->isNullValue();
20390 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20391 IsElementZero = true;
20392 if (IsElementZero) {
20393 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20394 FirstZeroAfterNonZero = i;
20395 }
20396 NonZero |= !IsElementZero;
20397
20398 // Find a legal type for the constant store.
20399 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20400 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20401 unsigned IsFast = 0;
20402
20403 // Break early when size is too large to be legal.
20404 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20405 break;
20406
20407 if (TLI.isTypeLegal(StoreTy) &&
20408 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20409 DAG.getMachineFunction()) &&
20410 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20411 *FirstInChain->getMemOperand(), &IsFast) &&
20412 IsFast) {
20413 LastIntegerTrunc = false;
20414 LastLegalType = i + 1;
20415 // Or check whether a truncstore is legal.
20416 } else if (TLI.getTypeAction(Context, StoreTy) ==
20418 EVT LegalizedStoredValTy =
20419 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20420 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20421 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20422 DAG.getMachineFunction()) &&
20423 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20424 *FirstInChain->getMemOperand(), &IsFast) &&
20425 IsFast) {
20426 LastIntegerTrunc = true;
20427 LastLegalType = i + 1;
20428 }
20429 }
20430
20431 // We only use vectors if the target allows it and the function is not
20432 // marked with the noimplicitfloat attribute.
20433 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20434 AllowVectors) {
20435 // Find a legal type for the vector store.
20436 unsigned Elts = (i + 1) * NumMemElts;
20437 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20438 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20439 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20440 TLI.allowsMemoryAccess(Context, DL, Ty,
20441 *FirstInChain->getMemOperand(), &IsFast) &&
20442 IsFast)
20443 LastLegalVectorType = i + 1;
20444 }
20445 }
20446
20447 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20448 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20449 bool UseTrunc = LastIntegerTrunc && !UseVector;
20450
20451 // Check if we found a legal integer type that creates a meaningful
20452 // merge.
20453 if (NumElem < 2) {
20454 // We know that candidate stores are in order and of correct
20455 // shape. While there is no mergeable sequence from the
20456 // beginning one may start later in the sequence. The only
20457 // reason a merge of size N could have failed where another of
20458 // the same size would not have, is if the alignment has
20459 // improved or we've dropped a non-zero value. Drop as many
20460 // candidates as we can here.
20461 unsigned NumSkip = 1;
20462 while ((NumSkip < NumConsecutiveStores) &&
20463 (NumSkip < FirstZeroAfterNonZero) &&
20464 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20465 NumSkip++;
20466
20467 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20468 NumConsecutiveStores -= NumSkip;
20469 continue;
20470 }
20471
20472 // Check that we can merge these candidates without causing a cycle.
20473 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20474 RootNode)) {
20475 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20476 NumConsecutiveStores -= NumElem;
20477 continue;
20478 }
20479
20480 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20481 /*IsConstantSrc*/ true,
20482 UseVector, UseTrunc);
20483
20484 // Remove merged stores for next iteration.
20485 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20486 NumConsecutiveStores -= NumElem;
20487 }
20488 return MadeChange;
20489}
20490
20491bool DAGCombiner::tryStoreMergeOfExtracts(
20492 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20493 EVT MemVT, SDNode *RootNode) {
20494 LLVMContext &Context = *DAG.getContext();
20495 const DataLayout &DL = DAG.getDataLayout();
20496 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20497 bool MadeChange = false;
20498
20499 // Loop on Consecutive Stores on success.
20500 while (NumConsecutiveStores >= 2) {
20501 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20502 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20503 Align FirstStoreAlign = FirstInChain->getAlign();
20504 unsigned NumStoresToMerge = 1;
20505 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20506 // Find a legal type for the vector store.
20507 unsigned Elts = (i + 1) * NumMemElts;
20508 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20509 unsigned IsFast = 0;
20510
20511 // Break early when size is too large to be legal.
20512 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20513 break;
20514
20515 if (TLI.isTypeLegal(Ty) &&
20516 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20517 TLI.allowsMemoryAccess(Context, DL, Ty,
20518 *FirstInChain->getMemOperand(), &IsFast) &&
20519 IsFast)
20520 NumStoresToMerge = i + 1;
20521 }
20522
20523 // Check if we found a legal integer type creating a meaningful
20524 // merge.
20525 if (NumStoresToMerge < 2) {
20526 // We know that candidate stores are in order and of correct
20527 // shape. While there is no mergeable sequence from the
20528 // beginning one may start later in the sequence. The only
20529 // reason a merge of size N could have failed where another of
20530 // the same size would not have, is if the alignment has
20531 // improved. Drop as many candidates as we can here.
20532 unsigned NumSkip = 1;
20533 while ((NumSkip < NumConsecutiveStores) &&
20534 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20535 NumSkip++;
20536
20537 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20538 NumConsecutiveStores -= NumSkip;
20539 continue;
20540 }
20541
20542 // Check that we can merge these candidates without causing a cycle.
20543 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20544 RootNode)) {
20545 StoreNodes.erase(StoreNodes.begin(),
20546 StoreNodes.begin() + NumStoresToMerge);
20547 NumConsecutiveStores -= NumStoresToMerge;
20548 continue;
20549 }
20550
20551 MadeChange |= mergeStoresOfConstantsOrVecElts(
20552 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20553 /*UseVector*/ true, /*UseTrunc*/ false);
20554
20555 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20556 NumConsecutiveStores -= NumStoresToMerge;
20557 }
20558 return MadeChange;
20559}
20560
20561bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20562 unsigned NumConsecutiveStores, EVT MemVT,
20563 SDNode *RootNode, bool AllowVectors,
20564 bool IsNonTemporalStore,
20565 bool IsNonTemporalLoad) {
20566 LLVMContext &Context = *DAG.getContext();
20567 const DataLayout &DL = DAG.getDataLayout();
20568 int64_t ElementSizeBytes = MemVT.getStoreSize();
20569 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20570 bool MadeChange = false;
20571
20572 // Look for load nodes which are used by the stored values.
20573 SmallVector<MemOpLink, 8> LoadNodes;
20574
20575 // Find acceptable loads. Loads need to have the same chain (token factor),
20576 // must not be zext, volatile, indexed, and they must be consecutive.
20577 BaseIndexOffset LdBasePtr;
20578
20579 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20580 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20582 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20583
20584 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20585 // If this is not the first ptr that we check.
20586 int64_t LdOffset = 0;
20587 if (LdBasePtr.getBase().getNode()) {
20588 // The base ptr must be the same.
20589 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20590 break;
20591 } else {
20592 // Check that all other base pointers are the same as this one.
20593 LdBasePtr = LdPtr;
20594 }
20595
20596 // We found a potential memory operand to merge.
20597 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20598 }
20599
20600 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20601 Align RequiredAlignment;
20602 bool NeedRotate = false;
20603 if (LoadNodes.size() == 2) {
20604 // If we have load/store pair instructions and we only have two values,
20605 // don't bother merging.
20606 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20607 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20608 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20609 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20610 break;
20611 }
20612 // If the loads are reversed, see if we can rotate the halves into place.
20613 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20614 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20615 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20616 if (Offset0 - Offset1 == ElementSizeBytes &&
20617 (hasOperation(ISD::ROTL, PairVT) ||
20618 hasOperation(ISD::ROTR, PairVT))) {
20619 std::swap(LoadNodes[0], LoadNodes[1]);
20620 NeedRotate = true;
20621 }
20622 }
20623 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20624 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20625 Align FirstStoreAlign = FirstInChain->getAlign();
20626 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20627
20628 // Scan the memory operations on the chain and find the first
20629 // non-consecutive load memory address. These variables hold the index in
20630 // the store node array.
20631
20632 unsigned LastConsecutiveLoad = 1;
20633
20634 // This variable refers to the size and not index in the array.
20635 unsigned LastLegalVectorType = 1;
20636 unsigned LastLegalIntegerType = 1;
20637 bool isDereferenceable = true;
20638 bool DoIntegerTruncate = false;
20639 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20640 SDValue LoadChain = FirstLoad->getChain();
20641 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20642 // All loads must share the same chain.
20643 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20644 break;
20645
20646 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20647 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20648 break;
20649 LastConsecutiveLoad = i;
20650
20651 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20652 isDereferenceable = false;
20653
20654 // Find a legal type for the vector store.
20655 unsigned Elts = (i + 1) * NumMemElts;
20656 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20657
20658 // Break early when size is too large to be legal.
20659 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20660 break;
20661
20662 unsigned IsFastSt = 0;
20663 unsigned IsFastLd = 0;
20664 // Don't try vector types if we need a rotate. We may still fail the
20665 // legality checks for the integer type, but we can't handle the rotate
20666 // case with vectors.
20667 // FIXME: We could use a shuffle in place of the rotate.
20668 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20669 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20670 DAG.getMachineFunction()) &&
20671 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20672 *FirstInChain->getMemOperand(), &IsFastSt) &&
20673 IsFastSt &&
20674 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20675 *FirstLoad->getMemOperand(), &IsFastLd) &&
20676 IsFastLd) {
20677 LastLegalVectorType = i + 1;
20678 }
20679
20680 // Find a legal type for the integer store.
20681 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20682 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20683 if (TLI.isTypeLegal(StoreTy) &&
20684 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20685 DAG.getMachineFunction()) &&
20686 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20687 *FirstInChain->getMemOperand(), &IsFastSt) &&
20688 IsFastSt &&
20689 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20690 *FirstLoad->getMemOperand(), &IsFastLd) &&
20691 IsFastLd) {
20692 LastLegalIntegerType = i + 1;
20693 DoIntegerTruncate = false;
20694 // Or check whether a truncstore and extload is legal.
20695 } else if (TLI.getTypeAction(Context, StoreTy) ==
20697 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20698 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20699 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20700 DAG.getMachineFunction()) &&
20701 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20702 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20703 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20704 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20705 *FirstInChain->getMemOperand(), &IsFastSt) &&
20706 IsFastSt &&
20707 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20708 *FirstLoad->getMemOperand(), &IsFastLd) &&
20709 IsFastLd) {
20710 LastLegalIntegerType = i + 1;
20711 DoIntegerTruncate = true;
20712 }
20713 }
20714 }
20715
20716 // Only use vector types if the vector type is larger than the integer
20717 // type. If they are the same, use integers.
20718 bool UseVectorTy =
20719 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20720 unsigned LastLegalType =
20721 std::max(LastLegalVectorType, LastLegalIntegerType);
20722
20723 // We add +1 here because the LastXXX variables refer to location while
20724 // the NumElem refers to array/index size.
20725 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20726 NumElem = std::min(LastLegalType, NumElem);
20727 Align FirstLoadAlign = FirstLoad->getAlign();
20728
20729 if (NumElem < 2) {
20730 // We know that candidate stores are in order and of correct
20731 // shape. While there is no mergeable sequence from the
20732 // beginning one may start later in the sequence. The only
20733 // reason a merge of size N could have failed where another of
20734 // the same size would not have is if the alignment or either
20735 // the load or store has improved. Drop as many candidates as we
20736 // can here.
20737 unsigned NumSkip = 1;
20738 while ((NumSkip < LoadNodes.size()) &&
20739 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20740 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20741 NumSkip++;
20742 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20743 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20744 NumConsecutiveStores -= NumSkip;
20745 continue;
20746 }
20747
20748 // Check that we can merge these candidates without causing a cycle.
20749 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20750 RootNode)) {
20751 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20752 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20753 NumConsecutiveStores -= NumElem;
20754 continue;
20755 }
20756
20757 // Find if it is better to use vectors or integers to load and store
20758 // to memory.
20759 EVT JointMemOpVT;
20760 if (UseVectorTy) {
20761 // Find a legal type for the vector store.
20762 unsigned Elts = NumElem * NumMemElts;
20763 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20764 } else {
20765 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20766 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20767 }
20768
20769 SDLoc LoadDL(LoadNodes[0].MemNode);
20770 SDLoc StoreDL(StoreNodes[0].MemNode);
20771
20772 // The merged loads are required to have the same incoming chain, so
20773 // using the first's chain is acceptable.
20774
20775 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20776 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20777 AddToWorklist(NewStoreChain.getNode());
20778
20779 MachineMemOperand::Flags LdMMOFlags =
20780 isDereferenceable ? MachineMemOperand::MODereferenceable
20782 if (IsNonTemporalLoad)
20784
20785 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20786
20787 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20790
20791 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20792
20793 SDValue NewLoad, NewStore;
20794 if (UseVectorTy || !DoIntegerTruncate) {
20795 NewLoad = DAG.getLoad(
20796 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20797 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20798 SDValue StoreOp = NewLoad;
20799 if (NeedRotate) {
20800 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20801 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20802 "Unexpected type for rotate-able load pair");
20803 SDValue RotAmt =
20804 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20805 // Target can convert to the identical ROTR if it does not have ROTL.
20806 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20807 }
20808 NewStore = DAG.getStore(
20809 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20810 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20811 : MachinePointerInfo(FirstStoreAS),
20812 FirstStoreAlign, StMMOFlags);
20813 } else { // This must be the truncstore/extload case
20814 EVT ExtendedTy =
20815 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20816 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20817 FirstLoad->getChain(), FirstLoad->getBasePtr(),
20818 FirstLoad->getPointerInfo(), JointMemOpVT,
20819 FirstLoadAlign, LdMMOFlags);
20820 NewStore = DAG.getTruncStore(
20821 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20822 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20823 : MachinePointerInfo(FirstStoreAS),
20824 JointMemOpVT, FirstInChain->getAlign(),
20825 FirstInChain->getMemOperand()->getFlags());
20826 }
20827
20828 // Transfer chain users from old loads to the new load.
20829 for (unsigned i = 0; i < NumElem; ++i) {
20830 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
20832 SDValue(NewLoad.getNode(), 1));
20833 }
20834
20835 // Replace all stores with the new store. Recursively remove corresponding
20836 // values if they are no longer used.
20837 for (unsigned i = 0; i < NumElem; ++i) {
20838 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
20839 CombineTo(StoreNodes[i].MemNode, NewStore);
20840 if (Val->use_empty())
20841 recursivelyDeleteUnusedNodes(Val.getNode());
20842 }
20843
20844 MadeChange = true;
20845 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20846 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20847 NumConsecutiveStores -= NumElem;
20848 }
20849 return MadeChange;
20850}
20851
20852bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
20853 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
20854 return false;
20855
20856 // TODO: Extend this function to merge stores of scalable vectors.
20857 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
20858 // store since we know <vscale x 16 x i8> is exactly twice as large as
20859 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
20860 EVT MemVT = St->getMemoryVT();
20861 if (MemVT.isScalableVT())
20862 return false;
20863 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
20864 return false;
20865
20866 // This function cannot currently deal with non-byte-sized memory sizes.
20867 int64_t ElementSizeBytes = MemVT.getStoreSize();
20868 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
20869 return false;
20870
20871 // Do not bother looking at stored values that are not constants, loads, or
20872 // extracted vector elements.
20873 SDValue StoredVal = peekThroughBitcasts(St->getValue());
20874 const StoreSource StoreSrc = getStoreSource(StoredVal);
20875 if (StoreSrc == StoreSource::Unknown)
20876 return false;
20877
20878 SmallVector<MemOpLink, 8> StoreNodes;
20879 SDNode *RootNode;
20880 // Find potential store merge candidates by searching through chain sub-DAG
20881 getStoreMergeCandidates(St, StoreNodes, RootNode);
20882
20883 // Check if there is anything to merge.
20884 if (StoreNodes.size() < 2)
20885 return false;
20886
20887 // Sort the memory operands according to their distance from the
20888 // base pointer.
20889 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
20890 return LHS.OffsetFromBase < RHS.OffsetFromBase;
20891 });
20892
20893 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
20894 Attribute::NoImplicitFloat);
20895 bool IsNonTemporalStore = St->isNonTemporal();
20896 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
20897 cast<LoadSDNode>(StoredVal)->isNonTemporal();
20898
20899 // Store Merge attempts to merge the lowest stores. This generally
20900 // works out as if successful, as the remaining stores are checked
20901 // after the first collection of stores is merged. However, in the
20902 // case that a non-mergeable store is found first, e.g., {p[-2],
20903 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
20904 // mergeable cases. To prevent this, we prune such stores from the
20905 // front of StoreNodes here.
20906 bool MadeChange = false;
20907 while (StoreNodes.size() > 1) {
20908 unsigned NumConsecutiveStores =
20909 getConsecutiveStores(StoreNodes, ElementSizeBytes);
20910 // There are no more stores in the list to examine.
20911 if (NumConsecutiveStores == 0)
20912 return MadeChange;
20913
20914 // We have at least 2 consecutive stores. Try to merge them.
20915 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
20916 switch (StoreSrc) {
20917 case StoreSource::Constant:
20918 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
20919 MemVT, RootNode, AllowVectors);
20920 break;
20921
20922 case StoreSource::Extract:
20923 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
20924 MemVT, RootNode);
20925 break;
20926
20927 case StoreSource::Load:
20928 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
20929 MemVT, RootNode, AllowVectors,
20930 IsNonTemporalStore, IsNonTemporalLoad);
20931 break;
20932
20933 default:
20934 llvm_unreachable("Unhandled store source type");
20935 }
20936 }
20937 return MadeChange;
20938}
20939
20940SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
20941 SDLoc SL(ST);
20942 SDValue ReplStore;
20943
20944 // Replace the chain to avoid dependency.
20945 if (ST->isTruncatingStore()) {
20946 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
20947 ST->getBasePtr(), ST->getMemoryVT(),
20948 ST->getMemOperand());
20949 } else {
20950 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
20951 ST->getMemOperand());
20952 }
20953
20954 // Create token to keep both nodes around.
20955 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
20956 MVT::Other, ST->getChain(), ReplStore);
20957
20958 // Make sure the new and old chains are cleaned up.
20959 AddToWorklist(Token.getNode());
20960
20961 // Don't add users to work list.
20962 return CombineTo(ST, Token, false);
20963}
20964
20965SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
20966 SDValue Value = ST->getValue();
20967 if (Value.getOpcode() == ISD::TargetConstantFP)
20968 return SDValue();
20969
20970 if (!ISD::isNormalStore(ST))
20971 return SDValue();
20972
20973 SDLoc DL(ST);
20974
20975 SDValue Chain = ST->getChain();
20976 SDValue Ptr = ST->getBasePtr();
20977
20978 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
20979
20980 // NOTE: If the original store is volatile, this transform must not increase
20981 // the number of stores. For example, on x86-32 an f64 can be stored in one
20982 // processor operation but an i64 (which is not legal) requires two. So the
20983 // transform should not be done in this case.
20984
20985 SDValue Tmp;
20986 switch (CFP->getSimpleValueType(0).SimpleTy) {
20987 default:
20988 llvm_unreachable("Unknown FP type");
20989 case MVT::f16: // We don't do this for these yet.
20990 case MVT::bf16:
20991 case MVT::f80:
20992 case MVT::f128:
20993 case MVT::ppcf128:
20994 return SDValue();
20995 case MVT::f32:
20996 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
20997 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
20998 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
20999 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21000 MVT::i32);
21001 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21002 }
21003
21004 return SDValue();
21005 case MVT::f64:
21006 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21007 ST->isSimple()) ||
21008 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21009 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21010 getZExtValue(), SDLoc(CFP), MVT::i64);
21011 return DAG.getStore(Chain, DL, Tmp,
21012 Ptr, ST->getMemOperand());
21013 }
21014
21015 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21016 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21017 // Many FP stores are not made apparent until after legalize, e.g. for
21018 // argument passing. Since this is so common, custom legalize the
21019 // 64-bit integer store into two 32-bit stores.
21021 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21022 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21023 if (DAG.getDataLayout().isBigEndian())
21024 std::swap(Lo, Hi);
21025
21026 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21027 AAMDNodes AAInfo = ST->getAAInfo();
21028
21029 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21030 ST->getOriginalAlign(), MMOFlags, AAInfo);
21032 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21033 ST->getPointerInfo().getWithOffset(4),
21034 ST->getOriginalAlign(), MMOFlags, AAInfo);
21035 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21036 St0, St1);
21037 }
21038
21039 return SDValue();
21040 }
21041}
21042
21043// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21044//
21045// If a store of a load with an element inserted into it has no other
21046// uses in between the chain, then we can consider the vector store
21047// dead and replace it with just the single scalar element store.
21048SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21049 SDLoc DL(ST);
21050 SDValue Value = ST->getValue();
21051 SDValue Ptr = ST->getBasePtr();
21052 SDValue Chain = ST->getChain();
21053 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21054 return SDValue();
21055
21056 SDValue Elt = Value.getOperand(1);
21057 SDValue Idx = Value.getOperand(2);
21058
21059 // If the element isn't byte sized or is implicitly truncated then we can't
21060 // compute an offset.
21061 EVT EltVT = Elt.getValueType();
21062 if (!EltVT.isByteSized() ||
21063 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21064 return SDValue();
21065
21066 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21067 if (!Ld || Ld->getBasePtr() != Ptr ||
21068 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21069 !ISD::isNormalStore(ST) ||
21070 Ld->getAddressSpace() != ST->getAddressSpace() ||
21072 return SDValue();
21073
21074 unsigned IsFast;
21075 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21076 Elt.getValueType(), ST->getAddressSpace(),
21077 ST->getAlign(), ST->getMemOperand()->getFlags(),
21078 &IsFast) ||
21079 !IsFast)
21080 return SDValue();
21081
21082 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21083
21084 // If the offset is a known constant then try to recover the pointer
21085 // info
21086 SDValue NewPtr;
21087 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21088 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21089 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21090 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21091 } else {
21092 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21093 }
21094
21095 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21096 ST->getMemOperand()->getFlags());
21097}
21098
21099SDValue DAGCombiner::visitSTORE(SDNode *N) {
21100 StoreSDNode *ST = cast<StoreSDNode>(N);
21101 SDValue Chain = ST->getChain();
21102 SDValue Value = ST->getValue();
21103 SDValue Ptr = ST->getBasePtr();
21104
21105 // If this is a store of a bit convert, store the input value if the
21106 // resultant store does not need a higher alignment than the original.
21107 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21108 ST->isUnindexed()) {
21109 EVT SVT = Value.getOperand(0).getValueType();
21110 // If the store is volatile, we only want to change the store type if the
21111 // resulting store is legal. Otherwise we might increase the number of
21112 // memory accesses. We don't care if the original type was legal or not
21113 // as we assume software couldn't rely on the number of accesses of an
21114 // illegal type.
21115 // TODO: May be able to relax for unordered atomics (see D66309)
21116 if (((!LegalOperations && ST->isSimple()) ||
21117 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21118 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21119 DAG, *ST->getMemOperand())) {
21120 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21121 ST->getMemOperand());
21122 }
21123 }
21124
21125 // Turn 'store undef, Ptr' -> nothing.
21126 if (Value.isUndef() && ST->isUnindexed())
21127 return Chain;
21128
21129 // Try to infer better alignment information than the store already has.
21130 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21131 !ST->isAtomic()) {
21132 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21133 if (*Alignment > ST->getAlign() &&
21134 isAligned(*Alignment, ST->getSrcValueOffset())) {
21135 SDValue NewStore =
21136 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21137 ST->getMemoryVT(), *Alignment,
21138 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21139 // NewStore will always be N as we are only refining the alignment
21140 assert(NewStore.getNode() == N);
21141 (void)NewStore;
21142 }
21143 }
21144 }
21145
21146 // Try transforming a pair floating point load / store ops to integer
21147 // load / store ops.
21148 if (SDValue NewST = TransformFPLoadStorePair(N))
21149 return NewST;
21150
21151 // Try transforming several stores into STORE (BSWAP).
21152 if (SDValue Store = mergeTruncStores(ST))
21153 return Store;
21154
21155 if (ST->isUnindexed()) {
21156 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21157 // adjacent stores.
21158 if (findBetterNeighborChains(ST)) {
21159 // replaceStoreChain uses CombineTo, which handled all of the worklist
21160 // manipulation. Return the original node to not do anything else.
21161 return SDValue(ST, 0);
21162 }
21163 Chain = ST->getChain();
21164 }
21165
21166 // FIXME: is there such a thing as a truncating indexed store?
21167 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21168 Value.getValueType().isInteger() &&
21169 (!isa<ConstantSDNode>(Value) ||
21170 !cast<ConstantSDNode>(Value)->isOpaque())) {
21171 // Convert a truncating store of a extension into a standard store.
21172 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21173 Value.getOpcode() == ISD::SIGN_EXTEND ||
21174 Value.getOpcode() == ISD::ANY_EXTEND) &&
21175 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21176 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21177 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21178 ST->getMemOperand());
21179
21180 APInt TruncDemandedBits =
21181 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21182 ST->getMemoryVT().getScalarSizeInBits());
21183
21184 // See if we can simplify the operation with SimplifyDemandedBits, which
21185 // only works if the value has a single use.
21186 AddToWorklist(Value.getNode());
21187 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21188 // Re-visit the store if anything changed and the store hasn't been merged
21189 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21190 // node back to the worklist if necessary, but we also need to re-visit
21191 // the Store node itself.
21192 if (N->getOpcode() != ISD::DELETED_NODE)
21193 AddToWorklist(N);
21194 return SDValue(N, 0);
21195 }
21196
21197 // Otherwise, see if we can simplify the input to this truncstore with
21198 // knowledge that only the low bits are being used. For example:
21199 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21200 if (SDValue Shorter =
21201 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21202 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21203 ST->getMemOperand());
21204
21205 // If we're storing a truncated constant, see if we can simplify it.
21206 // TODO: Move this to targetShrinkDemandedConstant?
21207 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21208 if (!Cst->isOpaque()) {
21209 const APInt &CValue = Cst->getAPIntValue();
21210 APInt NewVal = CValue & TruncDemandedBits;
21211 if (NewVal != CValue) {
21212 SDValue Shorter =
21213 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21214 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21215 ST->getMemoryVT(), ST->getMemOperand());
21216 }
21217 }
21218 }
21219
21220 // If this is a load followed by a store to the same location, then the store
21221 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21222 // TODO: Add big-endian truncate support with test coverage.
21223 // TODO: Can relax for unordered atomics (see D66309)
21224 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21226 : Value;
21227 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21228 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21229 ST->isUnindexed() && ST->isSimple() &&
21230 Ld->getAddressSpace() == ST->getAddressSpace() &&
21231 // There can't be any side effects between the load and store, such as
21232 // a call or store.
21234 // The store is dead, remove it.
21235 return Chain;
21236 }
21237 }
21238
21239 // Try scalarizing vector stores of loads where we only change one element
21240 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21241 return NewST;
21242
21243 // TODO: Can relax for unordered atomics (see D66309)
21244 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21245 if (ST->isUnindexed() && ST->isSimple() &&
21246 ST1->isUnindexed() && ST1->isSimple()) {
21247 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21248 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21249 ST->getAddressSpace() == ST1->getAddressSpace()) {
21250 // If this is a store followed by a store with the same value to the
21251 // same location, then the store is dead/noop.
21252 return Chain;
21253 }
21254
21255 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21256 !ST1->getBasePtr().isUndef() &&
21257 ST->getAddressSpace() == ST1->getAddressSpace()) {
21258 // If we consider two stores and one smaller in size is a scalable
21259 // vector type and another one a bigger size store with a fixed type,
21260 // then we could not allow the scalable store removal because we don't
21261 // know its final size in the end.
21262 if (ST->getMemoryVT().isScalableVector() ||
21263 ST1->getMemoryVT().isScalableVector()) {
21264 if (ST1->getBasePtr() == Ptr &&
21265 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21266 ST->getMemoryVT().getStoreSize())) {
21267 CombineTo(ST1, ST1->getChain());
21268 return SDValue(N, 0);
21269 }
21270 } else {
21271 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21272 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21273 // If this is a store who's preceding store to a subset of the current
21274 // location and no one other node is chained to that store we can
21275 // effectively drop the store. Do not remove stores to undef as they
21276 // may be used as data sinks.
21277 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21278 ChainBase,
21279 ST1->getMemoryVT().getFixedSizeInBits())) {
21280 CombineTo(ST1, ST1->getChain());
21281 return SDValue(N, 0);
21282 }
21283 }
21284 }
21285 }
21286 }
21287
21288 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21289 // truncating store. We can do this even if this is already a truncstore.
21290 if ((Value.getOpcode() == ISD::FP_ROUND ||
21291 Value.getOpcode() == ISD::TRUNCATE) &&
21292 Value->hasOneUse() && ST->isUnindexed() &&
21293 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21294 ST->getMemoryVT(), LegalOperations)) {
21295 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21296 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21297 }
21298
21299 // Always perform this optimization before types are legal. If the target
21300 // prefers, also try this after legalization to catch stores that were created
21301 // by intrinsics or other nodes.
21302 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21303 while (true) {
21304 // There can be multiple store sequences on the same chain.
21305 // Keep trying to merge store sequences until we are unable to do so
21306 // or until we merge the last store on the chain.
21307 bool Changed = mergeConsecutiveStores(ST);
21308 if (!Changed) break;
21309 // Return N as merge only uses CombineTo and no worklist clean
21310 // up is necessary.
21311 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21312 return SDValue(N, 0);
21313 }
21314 }
21315
21316 // Try transforming N to an indexed store.
21317 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21318 return SDValue(N, 0);
21319
21320 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21321 //
21322 // Make sure to do this only after attempting to merge stores in order to
21323 // avoid changing the types of some subset of stores due to visit order,
21324 // preventing their merging.
21325 if (isa<ConstantFPSDNode>(ST->getValue())) {
21326 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21327 return NewSt;
21328 }
21329
21330 if (SDValue NewSt = splitMergedValStore(ST))
21331 return NewSt;
21332
21333 return ReduceLoadOpStoreWidth(N);
21334}
21335
21336SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21337 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21338 if (!LifetimeEnd->hasOffset())
21339 return SDValue();
21340
21341 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21342 LifetimeEnd->getOffset(), false);
21343
21344 // We walk up the chains to find stores.
21345 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21346 while (!Chains.empty()) {
21347 SDValue Chain = Chains.pop_back_val();
21348 if (!Chain.hasOneUse())
21349 continue;
21350 switch (Chain.getOpcode()) {
21351 case ISD::TokenFactor:
21352 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21353 Chains.push_back(Chain.getOperand(--Nops));
21354 break;
21356 case ISD::LIFETIME_END:
21357 // We can forward past any lifetime start/end that can be proven not to
21358 // alias the node.
21359 if (!mayAlias(Chain.getNode(), N))
21360 Chains.push_back(Chain.getOperand(0));
21361 break;
21362 case ISD::STORE: {
21363 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21364 // TODO: Can relax for unordered atomics (see D66309)
21365 if (!ST->isSimple() || ST->isIndexed())
21366 continue;
21367 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21368 // The bounds of a scalable store are not known until runtime, so this
21369 // store cannot be elided.
21370 if (StoreSize.isScalable())
21371 continue;
21372 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21373 // If we store purely within object bounds just before its lifetime ends,
21374 // we can remove the store.
21375 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21376 StoreSize.getFixedValue() * 8)) {
21377 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21378 dbgs() << "\nwithin LIFETIME_END of : ";
21379 LifetimeEndBase.dump(); dbgs() << "\n");
21380 CombineTo(ST, ST->getChain());
21381 return SDValue(N, 0);
21382 }
21383 }
21384 }
21385 }
21386 return SDValue();
21387}
21388
21389/// For the instruction sequence of store below, F and I values
21390/// are bundled together as an i64 value before being stored into memory.
21391/// Sometimes it is more efficent to generate separate stores for F and I,
21392/// which can remove the bitwise instructions or sink them to colder places.
21393///
21394/// (store (or (zext (bitcast F to i32) to i64),
21395/// (shl (zext I to i64), 32)), addr) -->
21396/// (store F, addr) and (store I, addr+4)
21397///
21398/// Similarly, splitting for other merged store can also be beneficial, like:
21399/// For pair of {i32, i32}, i64 store --> two i32 stores.
21400/// For pair of {i32, i16}, i64 store --> two i32 stores.
21401/// For pair of {i16, i16}, i32 store --> two i16 stores.
21402/// For pair of {i16, i8}, i32 store --> two i16 stores.
21403/// For pair of {i8, i8}, i16 store --> two i8 stores.
21404///
21405/// We allow each target to determine specifically which kind of splitting is
21406/// supported.
21407///
21408/// The store patterns are commonly seen from the simple code snippet below
21409/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21410/// void goo(const std::pair<int, float> &);
21411/// hoo() {
21412/// ...
21413/// goo(std::make_pair(tmp, ftmp));
21414/// ...
21415/// }
21416///
21417SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21418 if (OptLevel == CodeGenOptLevel::None)
21419 return SDValue();
21420
21421 // Can't change the number of memory accesses for a volatile store or break
21422 // atomicity for an atomic one.
21423 if (!ST->isSimple())
21424 return SDValue();
21425
21426 SDValue Val = ST->getValue();
21427 SDLoc DL(ST);
21428
21429 // Match OR operand.
21430 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21431 return SDValue();
21432
21433 // Match SHL operand and get Lower and Higher parts of Val.
21434 SDValue Op1 = Val.getOperand(0);
21435 SDValue Op2 = Val.getOperand(1);
21436 SDValue Lo, Hi;
21437 if (Op1.getOpcode() != ISD::SHL) {
21438 std::swap(Op1, Op2);
21439 if (Op1.getOpcode() != ISD::SHL)
21440 return SDValue();
21441 }
21442 Lo = Op2;
21443 Hi = Op1.getOperand(0);
21444 if (!Op1.hasOneUse())
21445 return SDValue();
21446
21447 // Match shift amount to HalfValBitSize.
21448 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21449 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21450 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21451 return SDValue();
21452
21453 // Lo and Hi are zero-extended from int with size less equal than 32
21454 // to i64.
21455 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21456 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21457 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21458 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21459 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21460 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21461 return SDValue();
21462
21463 // Use the EVT of low and high parts before bitcast as the input
21464 // of target query.
21465 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21466 ? Lo.getOperand(0).getValueType()
21467 : Lo.getValueType();
21468 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21469 ? Hi.getOperand(0).getValueType()
21470 : Hi.getValueType();
21471 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21472 return SDValue();
21473
21474 // Start to split store.
21475 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21476 AAMDNodes AAInfo = ST->getAAInfo();
21477
21478 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21479 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21480 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21481 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21482
21483 SDValue Chain = ST->getChain();
21484 SDValue Ptr = ST->getBasePtr();
21485 // Lower value store.
21486 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21487 ST->getOriginalAlign(), MMOFlags, AAInfo);
21488 Ptr =
21489 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21490 // Higher value store.
21491 SDValue St1 = DAG.getStore(
21492 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21493 ST->getOriginalAlign(), MMOFlags, AAInfo);
21494 return St1;
21495}
21496
21497// Merge an insertion into an existing shuffle:
21498// (insert_vector_elt (vector_shuffle X, Y, Mask),
21499// .(extract_vector_elt X, N), InsIndex)
21500// --> (vector_shuffle X, Y, NewMask)
21501// and variations where shuffle operands may be CONCAT_VECTORS.
21503 SmallVectorImpl<int> &NewMask, SDValue Elt,
21504 unsigned InsIndex) {
21505 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21506 !isa<ConstantSDNode>(Elt.getOperand(1)))
21507 return false;
21508
21509 // Vec's operand 0 is using indices from 0 to N-1 and
21510 // operand 1 from N to 2N - 1, where N is the number of
21511 // elements in the vectors.
21512 SDValue InsertVal0 = Elt.getOperand(0);
21513 int ElementOffset = -1;
21514
21515 // We explore the inputs of the shuffle in order to see if we find the
21516 // source of the extract_vector_elt. If so, we can use it to modify the
21517 // shuffle rather than perform an insert_vector_elt.
21519 ArgWorkList.emplace_back(Mask.size(), Y);
21520 ArgWorkList.emplace_back(0, X);
21521
21522 while (!ArgWorkList.empty()) {
21523 int ArgOffset;
21524 SDValue ArgVal;
21525 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21526
21527 if (ArgVal == InsertVal0) {
21528 ElementOffset = ArgOffset;
21529 break;
21530 }
21531
21532 // Peek through concat_vector.
21533 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21534 int CurrentArgOffset =
21535 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21536 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21537 for (SDValue Op : reverse(ArgVal->ops())) {
21538 CurrentArgOffset -= Step;
21539 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21540 }
21541
21542 // Make sure we went through all the elements and did not screw up index
21543 // computation.
21544 assert(CurrentArgOffset == ArgOffset);
21545 }
21546 }
21547
21548 // If we failed to find a match, see if we can replace an UNDEF shuffle
21549 // operand.
21550 if (ElementOffset == -1) {
21551 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21552 return false;
21553 ElementOffset = Mask.size();
21554 Y = InsertVal0;
21555 }
21556
21557 NewMask.assign(Mask.begin(), Mask.end());
21558 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21559 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21560 "NewMask[InsIndex] is out of bound");
21561 return true;
21562}
21563
21564// Merge an insertion into an existing shuffle:
21565// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21566// InsIndex)
21567// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21568// CONCAT_VECTORS.
21569SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21570 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21571 "Expected extract_vector_elt");
21572 SDValue InsertVal = N->getOperand(1);
21573 SDValue Vec = N->getOperand(0);
21574
21575 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21576 if (!SVN || !Vec.hasOneUse())
21577 return SDValue();
21578
21579 ArrayRef<int> Mask = SVN->getMask();
21580 SDValue X = Vec.getOperand(0);
21581 SDValue Y = Vec.getOperand(1);
21582
21583 SmallVector<int, 16> NewMask(Mask);
21584 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21585 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21586 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21587 if (LegalShuffle)
21588 return LegalShuffle;
21589 }
21590
21591 return SDValue();
21592}
21593
21594// Convert a disguised subvector insertion into a shuffle:
21595// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21596// bitcast(shuffle (bitcast V), (extended X), Mask)
21597// Note: We do not use an insert_subvector node because that requires a
21598// legal subvector type.
21599SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21600 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21601 "Expected extract_vector_elt");
21602 SDValue InsertVal = N->getOperand(1);
21603
21604 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21605 !InsertVal.getOperand(0).getValueType().isVector())
21606 return SDValue();
21607
21608 SDValue SubVec = InsertVal.getOperand(0);
21609 SDValue DestVec = N->getOperand(0);
21610 EVT SubVecVT = SubVec.getValueType();
21611 EVT VT = DestVec.getValueType();
21612 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21613 // If the source only has a single vector element, the cost of creating adding
21614 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21615 if (NumSrcElts == 1)
21616 return SDValue();
21617 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21618 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21619
21620 // Step 1: Create a shuffle mask that implements this insert operation. The
21621 // vector that we are inserting into will be operand 0 of the shuffle, so
21622 // those elements are just 'i'. The inserted subvector is in the first
21623 // positions of operand 1 of the shuffle. Example:
21624 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21625 SmallVector<int, 16> Mask(NumMaskVals);
21626 for (unsigned i = 0; i != NumMaskVals; ++i) {
21627 if (i / NumSrcElts == InsIndex)
21628 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21629 else
21630 Mask[i] = i;
21631 }
21632
21633 // Bail out if the target can not handle the shuffle we want to create.
21634 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21635 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21636 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21637 return SDValue();
21638
21639 // Step 2: Create a wide vector from the inserted source vector by appending
21640 // undefined elements. This is the same size as our destination vector.
21641 SDLoc DL(N);
21642 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21643 ConcatOps[0] = SubVec;
21644 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21645
21646 // Step 3: Shuffle in the padded subvector.
21647 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21648 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21649 AddToWorklist(PaddedSubV.getNode());
21650 AddToWorklist(DestVecBC.getNode());
21651 AddToWorklist(Shuf.getNode());
21652 return DAG.getBitcast(VT, Shuf);
21653}
21654
21655// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21656// possible and the new load will be quick. We use more loads but less shuffles
21657// and inserts.
21658SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21659 EVT VT = N->getValueType(0);
21660
21661 // InsIndex is expected to be the first of last lane.
21662 if (!VT.isFixedLengthVector() ||
21663 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21664 return SDValue();
21665
21666 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21667 // depending on the InsIndex.
21668 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21669 SDValue Scalar = N->getOperand(1);
21670 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21671 return InsIndex == P.index() || P.value() < 0 ||
21672 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21673 (InsIndex == VT.getVectorNumElements() - 1 &&
21674 P.value() == (int)P.index() + 1);
21675 }))
21676 return SDValue();
21677
21678 // We optionally skip over an extend so long as both loads are extended in the
21679 // same way from the same type.
21680 unsigned Extend = 0;
21681 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21682 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21683 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21684 Extend = Scalar.getOpcode();
21685 Scalar = Scalar.getOperand(0);
21686 }
21687
21688 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21689 if (!ScalarLoad)
21690 return SDValue();
21691
21692 SDValue Vec = Shuffle->getOperand(0);
21693 if (Extend) {
21694 if (Vec.getOpcode() != Extend)
21695 return SDValue();
21696 Vec = Vec.getOperand(0);
21697 }
21698 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21699 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21700 return SDValue();
21701
21702 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21703 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21704 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21705 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21706 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21707 return SDValue();
21708
21709 // Check that the offset between the pointers to produce a single continuous
21710 // load.
21711 if (InsIndex == 0) {
21712 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21713 -1))
21714 return SDValue();
21715 } else {
21717 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21718 return SDValue();
21719 }
21720
21721 // And that the new unaligned load will be fast.
21722 unsigned IsFast = 0;
21723 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21724 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21725 Vec.getValueType(), VecLoad->getAddressSpace(),
21726 NewAlign, VecLoad->getMemOperand()->getFlags(),
21727 &IsFast) ||
21728 !IsFast)
21729 return SDValue();
21730
21731 // Calculate the new Ptr and create the new load.
21732 SDLoc DL(N);
21733 SDValue Ptr = ScalarLoad->getBasePtr();
21734 if (InsIndex != 0)
21735 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21736 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21737 MachinePointerInfo PtrInfo =
21738 InsIndex == 0 ? ScalarLoad->getPointerInfo()
21739 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21740
21741 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21742 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21743 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21744 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21745 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21746}
21747
21748SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21749 SDValue InVec = N->getOperand(0);
21750 SDValue InVal = N->getOperand(1);
21751 SDValue EltNo = N->getOperand(2);
21752 SDLoc DL(N);
21753
21754 EVT VT = InVec.getValueType();
21755 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21756
21757 // Insert into out-of-bounds element is undefined.
21758 if (IndexC && VT.isFixedLengthVector() &&
21759 IndexC->getZExtValue() >= VT.getVectorNumElements())
21760 return DAG.getUNDEF(VT);
21761
21762 // Remove redundant insertions:
21763 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21764 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21765 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21766 return InVec;
21767
21768 if (!IndexC) {
21769 // If this is variable insert to undef vector, it might be better to splat:
21770 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21771 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21772 return DAG.getSplat(VT, DL, InVal);
21773 return SDValue();
21774 }
21775
21776 if (VT.isScalableVector())
21777 return SDValue();
21778
21779 unsigned NumElts = VT.getVectorNumElements();
21780
21781 // We must know which element is being inserted for folds below here.
21782 unsigned Elt = IndexC->getZExtValue();
21783
21784 // Handle <1 x ???> vector insertion special cases.
21785 if (NumElts == 1) {
21786 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21787 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21788 InVal.getOperand(0).getValueType() == VT &&
21789 isNullConstant(InVal.getOperand(1)))
21790 return InVal.getOperand(0);
21791 }
21792
21793 // Canonicalize insert_vector_elt dag nodes.
21794 // Example:
21795 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21796 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21797 //
21798 // Do this only if the child insert_vector node has one use; also
21799 // do this only if indices are both constants and Idx1 < Idx0.
21800 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21801 && isa<ConstantSDNode>(InVec.getOperand(2))) {
21802 unsigned OtherElt = InVec.getConstantOperandVal(2);
21803 if (Elt < OtherElt) {
21804 // Swap nodes.
21805 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
21806 InVec.getOperand(0), InVal, EltNo);
21807 AddToWorklist(NewOp.getNode());
21808 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
21809 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
21810 }
21811 }
21812
21813 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
21814 return Shuf;
21815
21816 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
21817 return Shuf;
21818
21819 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
21820 return Shuf;
21821
21822 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
21823 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
21824 // vXi1 vector - we don't need to recurse.
21825 if (NumElts == 1)
21826 return DAG.getBuildVector(VT, DL, {InVal});
21827
21828 // If we haven't already collected the element, insert into the op list.
21829 EVT MaxEltVT = InVal.getValueType();
21830 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
21831 unsigned Idx) {
21832 if (!Ops[Idx]) {
21833 Ops[Idx] = Elt;
21834 if (VT.isInteger()) {
21835 EVT EltVT = Elt.getValueType();
21836 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
21837 }
21838 }
21839 };
21840
21841 // Ensure all the operands are the same value type, fill any missing
21842 // operands with UNDEF and create the BUILD_VECTOR.
21843 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
21844 assert(Ops.size() == NumElts && "Unexpected vector size");
21845 for (SDValue &Op : Ops) {
21846 if (Op)
21847 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
21848 else
21849 Op = DAG.getUNDEF(MaxEltVT);
21850 }
21851 return DAG.getBuildVector(VT, DL, Ops);
21852 };
21853
21854 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
21855 Ops[Elt] = InVal;
21856
21857 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
21858 for (SDValue CurVec = InVec; CurVec;) {
21859 // UNDEF - build new BUILD_VECTOR from already inserted operands.
21860 if (CurVec.isUndef())
21861 return CanonicalizeBuildVector(Ops);
21862
21863 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
21864 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
21865 for (unsigned I = 0; I != NumElts; ++I)
21866 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
21867 return CanonicalizeBuildVector(Ops);
21868 }
21869
21870 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
21871 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
21872 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
21873 return CanonicalizeBuildVector(Ops);
21874 }
21875
21876 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
21877 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
21878 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
21879 if (CurIdx->getAPIntValue().ult(NumElts)) {
21880 unsigned Idx = CurIdx->getZExtValue();
21881 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
21882
21883 // Found entire BUILD_VECTOR.
21884 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
21885 return CanonicalizeBuildVector(Ops);
21886
21887 CurVec = CurVec->getOperand(0);
21888 continue;
21889 }
21890
21891 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
21892 // update the shuffle mask (and second operand if we started with unary
21893 // shuffle) and create a new legal shuffle.
21894 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
21895 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
21896 SDValue LHS = SVN->getOperand(0);
21897 SDValue RHS = SVN->getOperand(1);
21899 bool Merged = true;
21900 for (auto I : enumerate(Ops)) {
21901 SDValue &Op = I.value();
21902 if (Op) {
21903 SmallVector<int, 16> NewMask;
21904 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
21905 Merged = false;
21906 break;
21907 }
21908 Mask = std::move(NewMask);
21909 }
21910 }
21911 if (Merged)
21912 if (SDValue NewShuffle =
21913 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
21914 return NewShuffle;
21915 }
21916
21917 // If all insertions are zero value, try to convert to AND mask.
21918 // TODO: Do this for -1 with OR mask?
21919 if (!LegalOperations && llvm::isNullConstant(InVal) &&
21920 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
21921 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
21922 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
21923 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
21925 for (unsigned I = 0; I != NumElts; ++I)
21926 Mask[I] = Ops[I] ? Zero : AllOnes;
21927 return DAG.getNode(ISD::AND, DL, VT, CurVec,
21928 DAG.getBuildVector(VT, DL, Mask));
21929 }
21930
21931 // Failed to find a match in the chain - bail.
21932 break;
21933 }
21934
21935 // See if we can fill in the missing constant elements as zeros.
21936 // TODO: Should we do this for any constant?
21937 APInt DemandedZeroElts = APInt::getZero(NumElts);
21938 for (unsigned I = 0; I != NumElts; ++I)
21939 if (!Ops[I])
21940 DemandedZeroElts.setBit(I);
21941
21942 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
21943 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
21944 : DAG.getConstantFP(0, DL, MaxEltVT);
21945 for (unsigned I = 0; I != NumElts; ++I)
21946 if (!Ops[I])
21947 Ops[I] = Zero;
21948
21949 return CanonicalizeBuildVector(Ops);
21950 }
21951 }
21952
21953 return SDValue();
21954}
21955
21956SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
21957 SDValue EltNo,
21958 LoadSDNode *OriginalLoad) {
21959 assert(OriginalLoad->isSimple());
21960
21961 EVT ResultVT = EVE->getValueType(0);
21962 EVT VecEltVT = InVecVT.getVectorElementType();
21963
21964 // If the vector element type is not a multiple of a byte then we are unable
21965 // to correctly compute an address to load only the extracted element as a
21966 // scalar.
21967 if (!VecEltVT.isByteSized())
21968 return SDValue();
21969
21970 ISD::LoadExtType ExtTy =
21971 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
21972 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
21973 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
21974 return SDValue();
21975
21976 Align Alignment = OriginalLoad->getAlign();
21978 SDLoc DL(EVE);
21979 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
21980 int Elt = ConstEltNo->getZExtValue();
21981 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
21982 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
21983 Alignment = commonAlignment(Alignment, PtrOff);
21984 } else {
21985 // Discard the pointer info except the address space because the memory
21986 // operand can't represent this new access since the offset is variable.
21987 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
21988 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
21989 }
21990
21991 unsigned IsFast = 0;
21992 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
21993 OriginalLoad->getAddressSpace(), Alignment,
21994 OriginalLoad->getMemOperand()->getFlags(),
21995 &IsFast) ||
21996 !IsFast)
21997 return SDValue();
21998
21999 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22000 InVecVT, EltNo);
22001
22002 // We are replacing a vector load with a scalar load. The new load must have
22003 // identical memory op ordering to the original.
22004 SDValue Load;
22005 if (ResultVT.bitsGT(VecEltVT)) {
22006 // If the result type of vextract is wider than the load, then issue an
22007 // extending load instead.
22008 ISD::LoadExtType ExtType =
22009 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22010 : ISD::EXTLOAD;
22011 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22012 NewPtr, MPI, VecEltVT, Alignment,
22013 OriginalLoad->getMemOperand()->getFlags(),
22014 OriginalLoad->getAAInfo());
22015 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22016 } else {
22017 // The result type is narrower or the same width as the vector element
22018 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22019 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22020 OriginalLoad->getAAInfo());
22021 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22022 if (ResultVT.bitsLT(VecEltVT))
22023 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22024 else
22025 Load = DAG.getBitcast(ResultVT, Load);
22026 }
22027 ++OpsNarrowed;
22028 return Load;
22029}
22030
22031/// Transform a vector binary operation into a scalar binary operation by moving
22032/// the math/logic after an extract element of a vector.
22034 bool LegalOperations) {
22035 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22036 SDValue Vec = ExtElt->getOperand(0);
22037 SDValue Index = ExtElt->getOperand(1);
22038 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22039 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22040 Vec->getNumValues() != 1)
22041 return SDValue();
22042
22043 // Targets may want to avoid this to prevent an expensive register transfer.
22044 if (!TLI.shouldScalarizeBinop(Vec))
22045 return SDValue();
22046
22047 // Extracting an element of a vector constant is constant-folded, so this
22048 // transform is just replacing a vector op with a scalar op while moving the
22049 // extract.
22050 SDValue Op0 = Vec.getOperand(0);
22051 SDValue Op1 = Vec.getOperand(1);
22052 APInt SplatVal;
22053 if (isAnyConstantBuildVector(Op0, true) ||
22054 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22055 isAnyConstantBuildVector(Op1, true) ||
22056 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22057 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22058 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22059 SDLoc DL(ExtElt);
22060 EVT VT = ExtElt->getValueType(0);
22061 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22062 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22063 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22064 }
22065
22066 return SDValue();
22067}
22068
22069// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22070// recursively analyse all of it's users. and try to model themselves as
22071// bit sequence extractions. If all of them agree on the new, narrower element
22072// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22073// new element type, do so now.
22074// This is mainly useful to recover from legalization that scalarized
22075// the vector as wide elements, but tries to rebuild it with narrower elements.
22076//
22077// Some more nodes could be modelled if that helps cover interesting patterns.
22078bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22079 SDNode *N) {
22080 // We perform this optimization post type-legalization because
22081 // the type-legalizer often scalarizes integer-promoted vectors.
22082 // Performing this optimization before may cause legalizaton cycles.
22083 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22084 return false;
22085
22086 // TODO: Add support for big-endian.
22087 if (DAG.getDataLayout().isBigEndian())
22088 return false;
22089
22090 SDValue VecOp = N->getOperand(0);
22091 EVT VecVT = VecOp.getValueType();
22092 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22093
22094 // We must start with a constant extraction index.
22095 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22096 if (!IndexC)
22097 return false;
22098
22099 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22100 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22101
22102 // TODO: deal with the case of implicit anyext of the extraction.
22103 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22104 EVT ScalarVT = N->getValueType(0);
22105 if (VecVT.getScalarType() != ScalarVT)
22106 return false;
22107
22108 // TODO: deal with the cases other than everything being integer-typed.
22109 if (!ScalarVT.isScalarInteger())
22110 return false;
22111
22112 struct Entry {
22114
22115 // Which bits of VecOp does it contain?
22116 unsigned BitPos;
22117 int NumBits;
22118 // NOTE: the actual width of \p Producer may be wider than NumBits!
22119
22120 Entry(Entry &&) = default;
22121 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22122 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22123
22124 Entry() = delete;
22125 Entry(const Entry &) = delete;
22126 Entry &operator=(const Entry &) = delete;
22127 Entry &operator=(Entry &&) = delete;
22128 };
22129 SmallVector<Entry, 32> Worklist;
22131
22132 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22133 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22134 /*NumBits=*/VecEltBitWidth);
22135
22136 while (!Worklist.empty()) {
22137 Entry E = Worklist.pop_back_val();
22138 // Does the node not even use any of the VecOp bits?
22139 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22140 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22141 return false; // Let's allow the other combines clean this up first.
22142 // Did we fail to model any of the users of the Producer?
22143 bool ProducerIsLeaf = false;
22144 // Look at each user of this Producer.
22145 for (SDNode *User : E.Producer->uses()) {
22146 switch (User->getOpcode()) {
22147 // TODO: support ISD::BITCAST
22148 // TODO: support ISD::ANY_EXTEND
22149 // TODO: support ISD::ZERO_EXTEND
22150 // TODO: support ISD::SIGN_EXTEND
22151 case ISD::TRUNCATE:
22152 // Truncation simply means we keep position, but extract less bits.
22153 Worklist.emplace_back(User, E.BitPos,
22154 /*NumBits=*/User->getValueSizeInBits(0));
22155 break;
22156 // TODO: support ISD::SRA
22157 // TODO: support ISD::SHL
22158 case ISD::SRL:
22159 // We should be shifting the Producer by a constant amount.
22160 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22161 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22162 // Logical right-shift means that we start extraction later,
22163 // but stop it at the same position we did previously.
22164 unsigned ShAmt = ShAmtC->getZExtValue();
22165 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22166 break;
22167 }
22168 [[fallthrough]];
22169 default:
22170 // We can not model this user of the Producer.
22171 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22172 ProducerIsLeaf = true;
22173 // Profitability check: all users that we can not model
22174 // must be ISD::BUILD_VECTOR's.
22175 if (User->getOpcode() != ISD::BUILD_VECTOR)
22176 return false;
22177 break;
22178 }
22179 }
22180 if (ProducerIsLeaf)
22181 Leafs.emplace_back(std::move(E));
22182 }
22183
22184 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22185
22186 // If we are still at the same element granularity, give up,
22187 if (NewVecEltBitWidth == VecEltBitWidth)
22188 return false;
22189
22190 // The vector width must be a multiple of the new element width.
22191 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22192 return false;
22193
22194 // All leafs must agree on the new element width.
22195 // All leafs must not expect any "padding" bits ontop of that width.
22196 // All leafs must start extraction from multiple of that width.
22197 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22198 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22199 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22200 E.BitPos % NewVecEltBitWidth == 0;
22201 }))
22202 return false;
22203
22204 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22205 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22206 VecVT.getSizeInBits() / NewVecEltBitWidth);
22207
22208 if (LegalTypes &&
22209 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22210 return false;
22211
22212 if (LegalOperations &&
22213 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22215 return false;
22216
22217 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22218 for (const Entry &E : Leafs) {
22219 SDLoc DL(E.Producer);
22220 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22221 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22222 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22223 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22224 DAG.getVectorIdxConstant(NewIndex, DL));
22225 CombineTo(E.Producer, V);
22226 }
22227
22228 return true;
22229}
22230
22231SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22232 SDValue VecOp = N->getOperand(0);
22233 SDValue Index = N->getOperand(1);
22234 EVT ScalarVT = N->getValueType(0);
22235 EVT VecVT = VecOp.getValueType();
22236 if (VecOp.isUndef())
22237 return DAG.getUNDEF(ScalarVT);
22238
22239 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22240 //
22241 // This only really matters if the index is non-constant since other combines
22242 // on the constant elements already work.
22243 SDLoc DL(N);
22244 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22245 Index == VecOp.getOperand(2)) {
22246 SDValue Elt = VecOp.getOperand(1);
22247 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22248 }
22249
22250 // (vextract (scalar_to_vector val, 0) -> val
22251 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22252 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22253 if (DAG.isKnownNeverZero(Index))
22254 return DAG.getUNDEF(ScalarVT);
22255
22256 // Check if the result type doesn't match the inserted element type.
22257 // The inserted element and extracted element may have mismatched bitwidth.
22258 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22259 SDValue InOp = VecOp.getOperand(0);
22260 if (InOp.getValueType() != ScalarVT) {
22261 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22262 if (InOp.getValueType().bitsGT(ScalarVT))
22263 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22264 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22265 }
22266 return InOp;
22267 }
22268
22269 // extract_vector_elt of out-of-bounds element -> UNDEF
22270 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22271 if (IndexC && VecVT.isFixedLengthVector() &&
22272 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22273 return DAG.getUNDEF(ScalarVT);
22274
22275 // extract_vector_elt (build_vector x, y), 1 -> y
22276 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22277 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22278 TLI.isTypeLegal(VecVT)) {
22279 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22280 VecVT.isFixedLengthVector()) &&
22281 "BUILD_VECTOR used for scalable vectors");
22282 unsigned IndexVal =
22283 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22284 SDValue Elt = VecOp.getOperand(IndexVal);
22285 EVT InEltVT = Elt.getValueType();
22286
22287 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22288 isNullConstant(Elt)) {
22289 // Sometimes build_vector's scalar input types do not match result type.
22290 if (ScalarVT == InEltVT)
22291 return Elt;
22292
22293 // TODO: It may be useful to truncate if free if the build_vector
22294 // implicitly converts.
22295 }
22296 }
22297
22298 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
22299 return BO;
22300
22301 if (VecVT.isScalableVector())
22302 return SDValue();
22303
22304 // All the code from this point onwards assumes fixed width vectors, but it's
22305 // possible that some of the combinations could be made to work for scalable
22306 // vectors too.
22307 unsigned NumElts = VecVT.getVectorNumElements();
22308 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22309
22310 // See if the extracted element is constant, in which case fold it if its
22311 // a legal fp immediate.
22312 if (IndexC && ScalarVT.isFloatingPoint()) {
22313 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22314 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22315 if (KnownElt.isConstant()) {
22316 APFloat CstFP =
22317 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22318 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22319 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22320 }
22321 }
22322
22323 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22324 // there are regressions on multiple targets without it. We can end up with a
22325 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22326 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22327 VecOp.hasOneUse()) {
22328 // The vector index of the LSBs of the source depend on the endian-ness.
22329 bool IsLE = DAG.getDataLayout().isLittleEndian();
22330 unsigned ExtractIndex = IndexC->getZExtValue();
22331 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22332 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22333 SDValue BCSrc = VecOp.getOperand(0);
22334 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22335 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22336
22337 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22338 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22339 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22340 // trunc i64 X to i32
22341 SDValue X = BCSrc.getOperand(0);
22342 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22343 "Extract element and scalar to vector can't change element type "
22344 "from FP to integer.");
22345 unsigned XBitWidth = X.getValueSizeInBits();
22346 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22347
22348 // An extract element return value type can be wider than its vector
22349 // operand element type. In that case, the high bits are undefined, so
22350 // it's possible that we may need to extend rather than truncate.
22351 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22352 assert(XBitWidth % VecEltBitWidth == 0 &&
22353 "Scalar bitwidth must be a multiple of vector element bitwidth");
22354 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22355 }
22356 }
22357 }
22358
22359 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22360 // We only perform this optimization before the op legalization phase because
22361 // we may introduce new vector instructions which are not backed by TD
22362 // patterns. For example on AVX, extracting elements from a wide vector
22363 // without using extract_subvector. However, if we can find an underlying
22364 // scalar value, then we can always use that.
22365 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22366 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22367 // Find the new index to extract from.
22368 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22369
22370 // Extracting an undef index is undef.
22371 if (OrigElt == -1)
22372 return DAG.getUNDEF(ScalarVT);
22373
22374 // Select the right vector half to extract from.
22375 SDValue SVInVec;
22376 if (OrigElt < (int)NumElts) {
22377 SVInVec = VecOp.getOperand(0);
22378 } else {
22379 SVInVec = VecOp.getOperand(1);
22380 OrigElt -= NumElts;
22381 }
22382
22383 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22384 SDValue InOp = SVInVec.getOperand(OrigElt);
22385 if (InOp.getValueType() != ScalarVT) {
22386 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22387 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22388 }
22389
22390 return InOp;
22391 }
22392
22393 // FIXME: We should handle recursing on other vector shuffles and
22394 // scalar_to_vector here as well.
22395
22396 if (!LegalOperations ||
22397 // FIXME: Should really be just isOperationLegalOrCustom.
22400 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22401 DAG.getVectorIdxConstant(OrigElt, DL));
22402 }
22403 }
22404
22405 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22406 // simplify it based on the (valid) extraction indices.
22407 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22408 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22409 Use->getOperand(0) == VecOp &&
22410 isa<ConstantSDNode>(Use->getOperand(1));
22411 })) {
22412 APInt DemandedElts = APInt::getZero(NumElts);
22413 for (SDNode *Use : VecOp->uses()) {
22414 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22415 if (CstElt->getAPIntValue().ult(NumElts))
22416 DemandedElts.setBit(CstElt->getZExtValue());
22417 }
22418 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22419 // We simplified the vector operand of this extract element. If this
22420 // extract is not dead, visit it again so it is folded properly.
22421 if (N->getOpcode() != ISD::DELETED_NODE)
22422 AddToWorklist(N);
22423 return SDValue(N, 0);
22424 }
22425 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22426 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22427 // We simplified the vector operand of this extract element. If this
22428 // extract is not dead, visit it again so it is folded properly.
22429 if (N->getOpcode() != ISD::DELETED_NODE)
22430 AddToWorklist(N);
22431 return SDValue(N, 0);
22432 }
22433 }
22434
22435 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22436 return SDValue(N, 0);
22437
22438 // Everything under here is trying to match an extract of a loaded value.
22439 // If the result of load has to be truncated, then it's not necessarily
22440 // profitable.
22441 bool BCNumEltsChanged = false;
22442 EVT ExtVT = VecVT.getVectorElementType();
22443 EVT LVT = ExtVT;
22444 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22445 return SDValue();
22446
22447 if (VecOp.getOpcode() == ISD::BITCAST) {
22448 // Don't duplicate a load with other uses.
22449 if (!VecOp.hasOneUse())
22450 return SDValue();
22451
22452 EVT BCVT = VecOp.getOperand(0).getValueType();
22453 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22454 return SDValue();
22455 if (NumElts != BCVT.getVectorNumElements())
22456 BCNumEltsChanged = true;
22457 VecOp = VecOp.getOperand(0);
22458 ExtVT = BCVT.getVectorElementType();
22459 }
22460
22461 // extract (vector load $addr), i --> load $addr + i * size
22462 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22463 ISD::isNormalLoad(VecOp.getNode()) &&
22464 !Index->hasPredecessor(VecOp.getNode())) {
22465 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22466 if (VecLoad && VecLoad->isSimple())
22467 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22468 }
22469
22470 // Perform only after legalization to ensure build_vector / vector_shuffle
22471 // optimizations have already been done.
22472 if (!LegalOperations || !IndexC)
22473 return SDValue();
22474
22475 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22476 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22477 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22478 int Elt = IndexC->getZExtValue();
22479 LoadSDNode *LN0 = nullptr;
22480 if (ISD::isNormalLoad(VecOp.getNode())) {
22481 LN0 = cast<LoadSDNode>(VecOp);
22482 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22483 VecOp.getOperand(0).getValueType() == ExtVT &&
22484 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22485 // Don't duplicate a load with other uses.
22486 if (!VecOp.hasOneUse())
22487 return SDValue();
22488
22489 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22490 }
22491 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22492 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22493 // =>
22494 // (load $addr+1*size)
22495
22496 // Don't duplicate a load with other uses.
22497 if (!VecOp.hasOneUse())
22498 return SDValue();
22499
22500 // If the bit convert changed the number of elements, it is unsafe
22501 // to examine the mask.
22502 if (BCNumEltsChanged)
22503 return SDValue();
22504
22505 // Select the input vector, guarding against out of range extract vector.
22506 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22507 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22508
22509 if (VecOp.getOpcode() == ISD::BITCAST) {
22510 // Don't duplicate a load with other uses.
22511 if (!VecOp.hasOneUse())
22512 return SDValue();
22513
22514 VecOp = VecOp.getOperand(0);
22515 }
22516 if (ISD::isNormalLoad(VecOp.getNode())) {
22517 LN0 = cast<LoadSDNode>(VecOp);
22518 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22519 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22520 }
22521 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22522 VecVT.getVectorElementType() == ScalarVT &&
22523 (!LegalTypes ||
22524 TLI.isTypeLegal(
22526 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22527 // -> extract_vector_elt a, 0
22528 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22529 // -> extract_vector_elt a, 1
22530 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22531 // -> extract_vector_elt b, 0
22532 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22533 // -> extract_vector_elt b, 1
22534 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22535 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22536 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22537 Index.getValueType());
22538
22539 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22541 ConcatVT.getVectorElementType(),
22542 ConcatOp, NewIdx);
22543 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22544 }
22545
22546 // Make sure we found a non-volatile load and the extractelement is
22547 // the only use.
22548 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22549 return SDValue();
22550
22551 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22552 if (Elt == -1)
22553 return DAG.getUNDEF(LVT);
22554
22555 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22556}
22557
22558// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22559SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22560 // We perform this optimization post type-legalization because
22561 // the type-legalizer often scalarizes integer-promoted vectors.
22562 // Performing this optimization before may create bit-casts which
22563 // will be type-legalized to complex code sequences.
22564 // We perform this optimization only before the operation legalizer because we
22565 // may introduce illegal operations.
22566 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22567 return SDValue();
22568
22569 unsigned NumInScalars = N->getNumOperands();
22570 SDLoc DL(N);
22571 EVT VT = N->getValueType(0);
22572
22573 // Check to see if this is a BUILD_VECTOR of a bunch of values
22574 // which come from any_extend or zero_extend nodes. If so, we can create
22575 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22576 // optimizations. We do not handle sign-extend because we can't fill the sign
22577 // using shuffles.
22578 EVT SourceType = MVT::Other;
22579 bool AllAnyExt = true;
22580
22581 for (unsigned i = 0; i != NumInScalars; ++i) {
22582 SDValue In = N->getOperand(i);
22583 // Ignore undef inputs.
22584 if (In.isUndef()) continue;
22585
22586 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22587 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22588
22589 // Abort if the element is not an extension.
22590 if (!ZeroExt && !AnyExt) {
22591 SourceType = MVT::Other;
22592 break;
22593 }
22594
22595 // The input is a ZeroExt or AnyExt. Check the original type.
22596 EVT InTy = In.getOperand(0).getValueType();
22597
22598 // Check that all of the widened source types are the same.
22599 if (SourceType == MVT::Other)
22600 // First time.
22601 SourceType = InTy;
22602 else if (InTy != SourceType) {
22603 // Multiple income types. Abort.
22604 SourceType = MVT::Other;
22605 break;
22606 }
22607
22608 // Check if all of the extends are ANY_EXTENDs.
22609 AllAnyExt &= AnyExt;
22610 }
22611
22612 // In order to have valid types, all of the inputs must be extended from the
22613 // same source type and all of the inputs must be any or zero extend.
22614 // Scalar sizes must be a power of two.
22615 EVT OutScalarTy = VT.getScalarType();
22616 bool ValidTypes =
22617 SourceType != MVT::Other &&
22618 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22619 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22620
22621 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22622 // turn into a single shuffle instruction.
22623 if (!ValidTypes)
22624 return SDValue();
22625
22626 // If we already have a splat buildvector, then don't fold it if it means
22627 // introducing zeros.
22628 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22629 return SDValue();
22630
22631 bool isLE = DAG.getDataLayout().isLittleEndian();
22632 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22633 assert(ElemRatio > 1 && "Invalid element size ratio");
22634 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22635 DAG.getConstant(0, DL, SourceType);
22636
22637 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22638 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22639
22640 // Populate the new build_vector
22641 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22642 SDValue Cast = N->getOperand(i);
22643 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22644 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22645 Cast.isUndef()) && "Invalid cast opcode");
22646 SDValue In;
22647 if (Cast.isUndef())
22648 In = DAG.getUNDEF(SourceType);
22649 else
22650 In = Cast->getOperand(0);
22651 unsigned Index = isLE ? (i * ElemRatio) :
22652 (i * ElemRatio + (ElemRatio - 1));
22653
22654 assert(Index < Ops.size() && "Invalid index");
22655 Ops[Index] = In;
22656 }
22657
22658 // The type of the new BUILD_VECTOR node.
22659 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22660 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22661 "Invalid vector size");
22662 // Check if the new vector type is legal.
22663 if (!isTypeLegal(VecVT) ||
22664 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22666 return SDValue();
22667
22668 // Make the new BUILD_VECTOR.
22669 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22670
22671 // The new BUILD_VECTOR node has the potential to be further optimized.
22672 AddToWorklist(BV.getNode());
22673 // Bitcast to the desired type.
22674 return DAG.getBitcast(VT, BV);
22675}
22676
22677// Simplify (build_vec (trunc $1)
22678// (trunc (srl $1 half-width))
22679// (trunc (srl $1 (2 * half-width))))
22680// to (bitcast $1)
22681SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22682 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22683
22684 EVT VT = N->getValueType(0);
22685
22686 // Don't run this before LegalizeTypes if VT is legal.
22687 // Targets may have other preferences.
22688 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22689 return SDValue();
22690
22691 // Only for little endian
22692 if (!DAG.getDataLayout().isLittleEndian())
22693 return SDValue();
22694
22695 SDLoc DL(N);
22696 EVT OutScalarTy = VT.getScalarType();
22697 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22698
22699 // Only for power of two types to be sure that bitcast works well
22700 if (!isPowerOf2_64(ScalarTypeBitsize))
22701 return SDValue();
22702
22703 unsigned NumInScalars = N->getNumOperands();
22704
22705 // Look through bitcasts
22706 auto PeekThroughBitcast = [](SDValue Op) {
22707 if (Op.getOpcode() == ISD::BITCAST)
22708 return Op.getOperand(0);
22709 return Op;
22710 };
22711
22712 // The source value where all the parts are extracted.
22713 SDValue Src;
22714 for (unsigned i = 0; i != NumInScalars; ++i) {
22715 SDValue In = PeekThroughBitcast(N->getOperand(i));
22716 // Ignore undef inputs.
22717 if (In.isUndef()) continue;
22718
22719 if (In.getOpcode() != ISD::TRUNCATE)
22720 return SDValue();
22721
22722 In = PeekThroughBitcast(In.getOperand(0));
22723
22724 if (In.getOpcode() != ISD::SRL) {
22725 // For now only build_vec without shuffling, handle shifts here in the
22726 // future.
22727 if (i != 0)
22728 return SDValue();
22729
22730 Src = In;
22731 } else {
22732 // In is SRL
22733 SDValue part = PeekThroughBitcast(In.getOperand(0));
22734
22735 if (!Src) {
22736 Src = part;
22737 } else if (Src != part) {
22738 // Vector parts do not stem from the same variable
22739 return SDValue();
22740 }
22741
22742 SDValue ShiftAmtVal = In.getOperand(1);
22743 if (!isa<ConstantSDNode>(ShiftAmtVal))
22744 return SDValue();
22745
22746 uint64_t ShiftAmt = In.getConstantOperandVal(1);
22747
22748 // The extracted value is not extracted at the right position
22749 if (ShiftAmt != i * ScalarTypeBitsize)
22750 return SDValue();
22751 }
22752 }
22753
22754 // Only cast if the size is the same
22755 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22756 return SDValue();
22757
22758 return DAG.getBitcast(VT, Src);
22759}
22760
22761SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22762 ArrayRef<int> VectorMask,
22763 SDValue VecIn1, SDValue VecIn2,
22764 unsigned LeftIdx, bool DidSplitVec) {
22765 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22766
22767 EVT VT = N->getValueType(0);
22768 EVT InVT1 = VecIn1.getValueType();
22769 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22770
22771 unsigned NumElems = VT.getVectorNumElements();
22772 unsigned ShuffleNumElems = NumElems;
22773
22774 // If we artificially split a vector in two already, then the offsets in the
22775 // operands will all be based off of VecIn1, even those in VecIn2.
22776 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22777
22778 uint64_t VTSize = VT.getFixedSizeInBits();
22779 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22780 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22781
22782 assert(InVT2Size <= InVT1Size &&
22783 "Inputs must be sorted to be in non-increasing vector size order.");
22784
22785 // We can't generate a shuffle node with mismatched input and output types.
22786 // Try to make the types match the type of the output.
22787 if (InVT1 != VT || InVT2 != VT) {
22788 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22789 // If the output vector length is a multiple of both input lengths,
22790 // we can concatenate them and pad the rest with undefs.
22791 unsigned NumConcats = VTSize / InVT1Size;
22792 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22793 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22794 ConcatOps[0] = VecIn1;
22795 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22796 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22797 VecIn2 = SDValue();
22798 } else if (InVT1Size == VTSize * 2) {
22799 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22800 return SDValue();
22801
22802 if (!VecIn2.getNode()) {
22803 // If we only have one input vector, and it's twice the size of the
22804 // output, split it in two.
22805 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
22806 DAG.getVectorIdxConstant(NumElems, DL));
22807 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
22808 // Since we now have shorter input vectors, adjust the offset of the
22809 // second vector's start.
22810 Vec2Offset = NumElems;
22811 } else {
22812 assert(InVT2Size <= InVT1Size &&
22813 "Second input is not going to be larger than the first one.");
22814
22815 // VecIn1 is wider than the output, and we have another, possibly
22816 // smaller input. Pad the smaller input with undefs, shuffle at the
22817 // input vector width, and extract the output.
22818 // The shuffle type is different than VT, so check legality again.
22819 if (LegalOperations &&
22821 return SDValue();
22822
22823 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
22824 // lower it back into a BUILD_VECTOR. So if the inserted type is
22825 // illegal, don't even try.
22826 if (InVT1 != InVT2) {
22827 if (!TLI.isTypeLegal(InVT2))
22828 return SDValue();
22829 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22830 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22831 }
22832 ShuffleNumElems = NumElems * 2;
22833 }
22834 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
22835 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
22836 ConcatOps[0] = VecIn2;
22837 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22838 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
22839 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
22840 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
22841 return SDValue();
22842 // If dest vector has less than two elements, then use shuffle and extract
22843 // from larger regs will cost even more.
22844 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
22845 return SDValue();
22846 assert(InVT2Size <= InVT1Size &&
22847 "Second input is not going to be larger than the first one.");
22848
22849 // VecIn1 is wider than the output, and we have another, possibly
22850 // smaller input. Pad the smaller input with undefs, shuffle at the
22851 // input vector width, and extract the output.
22852 // The shuffle type is different than VT, so check legality again.
22853 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22854 return SDValue();
22855
22856 if (InVT1 != InVT2) {
22857 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22858 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22859 }
22860 ShuffleNumElems = InVT1Size / VTSize * NumElems;
22861 } else {
22862 // TODO: Support cases where the length mismatch isn't exactly by a
22863 // factor of 2.
22864 // TODO: Move this check upwards, so that if we have bad type
22865 // mismatches, we don't create any DAG nodes.
22866 return SDValue();
22867 }
22868 }
22869
22870 // Initialize mask to undef.
22871 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
22872
22873 // Only need to run up to the number of elements actually used, not the
22874 // total number of elements in the shuffle - if we are shuffling a wider
22875 // vector, the high lanes should be set to undef.
22876 for (unsigned i = 0; i != NumElems; ++i) {
22877 if (VectorMask[i] <= 0)
22878 continue;
22879
22880 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
22881 if (VectorMask[i] == (int)LeftIdx) {
22882 Mask[i] = ExtIndex;
22883 } else if (VectorMask[i] == (int)LeftIdx + 1) {
22884 Mask[i] = Vec2Offset + ExtIndex;
22885 }
22886 }
22887
22888 // The type the input vectors may have changed above.
22889 InVT1 = VecIn1.getValueType();
22890
22891 // If we already have a VecIn2, it should have the same type as VecIn1.
22892 // If we don't, get an undef/zero vector of the appropriate type.
22893 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
22894 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
22895
22896 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
22897 if (ShuffleNumElems > NumElems)
22898 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
22899
22900 return Shuffle;
22901}
22902
22904 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22905
22906 // First, determine where the build vector is not undef.
22907 // TODO: We could extend this to handle zero elements as well as undefs.
22908 int NumBVOps = BV->getNumOperands();
22909 int ZextElt = -1;
22910 for (int i = 0; i != NumBVOps; ++i) {
22911 SDValue Op = BV->getOperand(i);
22912 if (Op.isUndef())
22913 continue;
22914 if (ZextElt == -1)
22915 ZextElt = i;
22916 else
22917 return SDValue();
22918 }
22919 // Bail out if there's no non-undef element.
22920 if (ZextElt == -1)
22921 return SDValue();
22922
22923 // The build vector contains some number of undef elements and exactly
22924 // one other element. That other element must be a zero-extended scalar
22925 // extracted from a vector at a constant index to turn this into a shuffle.
22926 // Also, require that the build vector does not implicitly truncate/extend
22927 // its elements.
22928 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
22929 EVT VT = BV->getValueType(0);
22930 SDValue Zext = BV->getOperand(ZextElt);
22931 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
22933 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
22935 return SDValue();
22936
22937 // The zero-extend must be a multiple of the source size, and we must be
22938 // building a vector of the same size as the source of the extract element.
22939 SDValue Extract = Zext.getOperand(0);
22940 unsigned DestSize = Zext.getValueSizeInBits();
22941 unsigned SrcSize = Extract.getValueSizeInBits();
22942 if (DestSize % SrcSize != 0 ||
22943 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
22944 return SDValue();
22945
22946 // Create a shuffle mask that will combine the extracted element with zeros
22947 // and undefs.
22948 int ZextRatio = DestSize / SrcSize;
22949 int NumMaskElts = NumBVOps * ZextRatio;
22950 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
22951 for (int i = 0; i != NumMaskElts; ++i) {
22952 if (i / ZextRatio == ZextElt) {
22953 // The low bits of the (potentially translated) extracted element map to
22954 // the source vector. The high bits map to zero. We will use a zero vector
22955 // as the 2nd source operand of the shuffle, so use the 1st element of
22956 // that vector (mask value is number-of-elements) for the high bits.
22957 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
22958 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
22959 : NumMaskElts;
22960 }
22961
22962 // Undef elements of the build vector remain undef because we initialize
22963 // the shuffle mask with -1.
22964 }
22965
22966 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
22967 // bitcast (shuffle V, ZeroVec, VectorMask)
22968 SDLoc DL(BV);
22969 EVT VecVT = Extract.getOperand(0).getValueType();
22970 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
22971 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22972 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
22973 ZeroVec, ShufMask, DAG);
22974 if (!Shuf)
22975 return SDValue();
22976 return DAG.getBitcast(VT, Shuf);
22977}
22978
22979// FIXME: promote to STLExtras.
22980template <typename R, typename T>
22981static auto getFirstIndexOf(R &&Range, const T &Val) {
22982 auto I = find(Range, Val);
22983 if (I == Range.end())
22984 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
22985 return std::distance(Range.begin(), I);
22986}
22987
22988// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
22989// operations. If the types of the vectors we're extracting from allow it,
22990// turn this into a vector_shuffle node.
22991SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
22992 SDLoc DL(N);
22993 EVT VT = N->getValueType(0);
22994
22995 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
22996 if (!isTypeLegal(VT))
22997 return SDValue();
22998
23000 return V;
23001
23002 // May only combine to shuffle after legalize if shuffle is legal.
23003 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23004 return SDValue();
23005
23006 bool UsesZeroVector = false;
23007 unsigned NumElems = N->getNumOperands();
23008
23009 // Record, for each element of the newly built vector, which input vector
23010 // that element comes from. -1 stands for undef, 0 for the zero vector,
23011 // and positive values for the input vectors.
23012 // VectorMask maps each element to its vector number, and VecIn maps vector
23013 // numbers to their initial SDValues.
23014
23015 SmallVector<int, 8> VectorMask(NumElems, -1);
23017 VecIn.push_back(SDValue());
23018
23019 for (unsigned i = 0; i != NumElems; ++i) {
23020 SDValue Op = N->getOperand(i);
23021
23022 if (Op.isUndef())
23023 continue;
23024
23025 // See if we can use a blend with a zero vector.
23026 // TODO: Should we generalize this to a blend with an arbitrary constant
23027 // vector?
23029 UsesZeroVector = true;
23030 VectorMask[i] = 0;
23031 continue;
23032 }
23033
23034 // Not an undef or zero. If the input is something other than an
23035 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23036 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23037 !isa<ConstantSDNode>(Op.getOperand(1)))
23038 return SDValue();
23039 SDValue ExtractedFromVec = Op.getOperand(0);
23040
23041 if (ExtractedFromVec.getValueType().isScalableVector())
23042 return SDValue();
23043
23044 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23045 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23046 return SDValue();
23047
23048 // All inputs must have the same element type as the output.
23049 if (VT.getVectorElementType() !=
23050 ExtractedFromVec.getValueType().getVectorElementType())
23051 return SDValue();
23052
23053 // Have we seen this input vector before?
23054 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23055 // a map back from SDValues to numbers isn't worth it.
23056 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23057 if (Idx == -1) { // A new source vector?
23058 Idx = VecIn.size();
23059 VecIn.push_back(ExtractedFromVec);
23060 }
23061
23062 VectorMask[i] = Idx;
23063 }
23064
23065 // If we didn't find at least one input vector, bail out.
23066 if (VecIn.size() < 2)
23067 return SDValue();
23068
23069 // If all the Operands of BUILD_VECTOR extract from same
23070 // vector, then split the vector efficiently based on the maximum
23071 // vector access index and adjust the VectorMask and
23072 // VecIn accordingly.
23073 bool DidSplitVec = false;
23074 if (VecIn.size() == 2) {
23075 unsigned MaxIndex = 0;
23076 unsigned NearestPow2 = 0;
23077 SDValue Vec = VecIn.back();
23078 EVT InVT = Vec.getValueType();
23079 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23080
23081 for (unsigned i = 0; i < NumElems; i++) {
23082 if (VectorMask[i] <= 0)
23083 continue;
23084 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23085 IndexVec[i] = Index;
23086 MaxIndex = std::max(MaxIndex, Index);
23087 }
23088
23089 NearestPow2 = PowerOf2Ceil(MaxIndex);
23090 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23091 NumElems * 2 < NearestPow2) {
23092 unsigned SplitSize = NearestPow2 / 2;
23093 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23094 InVT.getVectorElementType(), SplitSize);
23095 if (TLI.isTypeLegal(SplitVT) &&
23096 SplitSize + SplitVT.getVectorNumElements() <=
23097 InVT.getVectorNumElements()) {
23098 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23099 DAG.getVectorIdxConstant(SplitSize, DL));
23100 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23101 DAG.getVectorIdxConstant(0, DL));
23102 VecIn.pop_back();
23103 VecIn.push_back(VecIn1);
23104 VecIn.push_back(VecIn2);
23105 DidSplitVec = true;
23106
23107 for (unsigned i = 0; i < NumElems; i++) {
23108 if (VectorMask[i] <= 0)
23109 continue;
23110 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23111 }
23112 }
23113 }
23114 }
23115
23116 // Sort input vectors by decreasing vector element count,
23117 // while preserving the relative order of equally-sized vectors.
23118 // Note that we keep the first "implicit zero vector as-is.
23119 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23120 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23121 [](const SDValue &a, const SDValue &b) {
23122 return a.getValueType().getVectorNumElements() >
23123 b.getValueType().getVectorNumElements();
23124 });
23125
23126 // We now also need to rebuild the VectorMask, because it referenced element
23127 // order in VecIn, and we just sorted them.
23128 for (int &SourceVectorIndex : VectorMask) {
23129 if (SourceVectorIndex <= 0)
23130 continue;
23131 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23132 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23133 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23134 SourceVectorIndex = Idx;
23135 }
23136
23137 VecIn = std::move(SortedVecIn);
23138
23139 // TODO: Should this fire if some of the input vectors has illegal type (like
23140 // it does now), or should we let legalization run its course first?
23141
23142 // Shuffle phase:
23143 // Take pairs of vectors, and shuffle them so that the result has elements
23144 // from these vectors in the correct places.
23145 // For example, given:
23146 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23147 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23148 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23149 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23150 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23151 // We will generate:
23152 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23153 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23154 SmallVector<SDValue, 4> Shuffles;
23155 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23156 unsigned LeftIdx = 2 * In + 1;
23157 SDValue VecLeft = VecIn[LeftIdx];
23158 SDValue VecRight =
23159 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23160
23161 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23162 VecRight, LeftIdx, DidSplitVec))
23163 Shuffles.push_back(Shuffle);
23164 else
23165 return SDValue();
23166 }
23167
23168 // If we need the zero vector as an "ingredient" in the blend tree, add it
23169 // to the list of shuffles.
23170 if (UsesZeroVector)
23171 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23172 : DAG.getConstantFP(0.0, DL, VT));
23173
23174 // If we only have one shuffle, we're done.
23175 if (Shuffles.size() == 1)
23176 return Shuffles[0];
23177
23178 // Update the vector mask to point to the post-shuffle vectors.
23179 for (int &Vec : VectorMask)
23180 if (Vec == 0)
23181 Vec = Shuffles.size() - 1;
23182 else
23183 Vec = (Vec - 1) / 2;
23184
23185 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23186 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23187 // generate:
23188 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23189 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23190 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23191 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23192 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23193 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23194 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23195
23196 // Make sure the initial size of the shuffle list is even.
23197 if (Shuffles.size() % 2)
23198 Shuffles.push_back(DAG.getUNDEF(VT));
23199
23200 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23201 if (CurSize % 2) {
23202 Shuffles[CurSize] = DAG.getUNDEF(VT);
23203 CurSize++;
23204 }
23205 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23206 int Left = 2 * In;
23207 int Right = 2 * In + 1;
23208 SmallVector<int, 8> Mask(NumElems, -1);
23209 SDValue L = Shuffles[Left];
23210 ArrayRef<int> LMask;
23211 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23212 L.use_empty() && L.getOperand(1).isUndef() &&
23213 L.getOperand(0).getValueType() == L.getValueType();
23214 if (IsLeftShuffle) {
23215 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23216 L = L.getOperand(0);
23217 }
23218 SDValue R = Shuffles[Right];
23219 ArrayRef<int> RMask;
23220 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23221 R.use_empty() && R.getOperand(1).isUndef() &&
23222 R.getOperand(0).getValueType() == R.getValueType();
23223 if (IsRightShuffle) {
23224 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23225 R = R.getOperand(0);
23226 }
23227 for (unsigned I = 0; I != NumElems; ++I) {
23228 if (VectorMask[I] == Left) {
23229 Mask[I] = I;
23230 if (IsLeftShuffle)
23231 Mask[I] = LMask[I];
23232 VectorMask[I] = In;
23233 } else if (VectorMask[I] == Right) {
23234 Mask[I] = I + NumElems;
23235 if (IsRightShuffle)
23236 Mask[I] = RMask[I] + NumElems;
23237 VectorMask[I] = In;
23238 }
23239 }
23240
23241 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23242 }
23243 }
23244 return Shuffles[0];
23245}
23246
23247// Try to turn a build vector of zero extends of extract vector elts into a
23248// a vector zero extend and possibly an extract subvector.
23249// TODO: Support sign extend?
23250// TODO: Allow undef elements?
23251SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23252 if (LegalOperations)
23253 return SDValue();
23254
23255 EVT VT = N->getValueType(0);
23256
23257 bool FoundZeroExtend = false;
23258 SDValue Op0 = N->getOperand(0);
23259 auto checkElem = [&](SDValue Op) -> int64_t {
23260 unsigned Opc = Op.getOpcode();
23261 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23262 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23263 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23264 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23265 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23266 return C->getZExtValue();
23267 return -1;
23268 };
23269
23270 // Make sure the first element matches
23271 // (zext (extract_vector_elt X, C))
23272 // Offset must be a constant multiple of the
23273 // known-minimum vector length of the result type.
23274 int64_t Offset = checkElem(Op0);
23275 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23276 return SDValue();
23277
23278 unsigned NumElems = N->getNumOperands();
23279 SDValue In = Op0.getOperand(0).getOperand(0);
23280 EVT InSVT = In.getValueType().getScalarType();
23281 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23282
23283 // Don't create an illegal input type after type legalization.
23284 if (LegalTypes && !TLI.isTypeLegal(InVT))
23285 return SDValue();
23286
23287 // Ensure all the elements come from the same vector and are adjacent.
23288 for (unsigned i = 1; i != NumElems; ++i) {
23289 if ((Offset + i) != checkElem(N->getOperand(i)))
23290 return SDValue();
23291 }
23292
23293 SDLoc DL(N);
23294 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23295 Op0.getOperand(0).getOperand(1));
23296 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23297 VT, In);
23298}
23299
23300// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23301// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23302// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23303// This patten can appear during legalization.
23304//
23305// NOTE: This can be generalized to allow more than a single
23306// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23307SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23308 // Don't run this after legalization. Targets may have other preferences.
23309 if (Level >= AfterLegalizeDAG)
23310 return SDValue();
23311
23312 // FIXME: support big-endian.
23313 if (DAG.getDataLayout().isBigEndian())
23314 return SDValue();
23315
23316 EVT VT = N->getValueType(0);
23317 EVT OpVT = N->getOperand(0).getValueType();
23318 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23319
23320 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23321
23322 if (!TLI.isTypeLegal(OpIntVT) ||
23323 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23324 return SDValue();
23325
23326 unsigned EltBitwidth = VT.getScalarSizeInBits();
23327 // NOTE: the actual width of operands may be wider than that!
23328
23329 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23330 // active bits they all have? We'll want to truncate them all to that width.
23331 unsigned ActiveBits = 0;
23332 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23333 for (auto I : enumerate(N->ops())) {
23334 SDValue Op = I.value();
23335 // FIXME: support UNDEF elements?
23336 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23337 unsigned OpActiveBits =
23338 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23339 if (OpActiveBits == 0) {
23340 KnownZeroOps.setBit(I.index());
23341 continue;
23342 }
23343 // Profitability check: don't allow non-zero constant operands.
23344 return SDValue();
23345 }
23346 // Profitability check: there must only be a single non-zero operand,
23347 // and it must be the first operand of the BUILD_VECTOR.
23348 if (I.index() != 0)
23349 return SDValue();
23350 // The operand must be a zero-extension itself.
23351 // FIXME: this could be generalized to known leading zeros check.
23352 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23353 return SDValue();
23354 unsigned CurrActiveBits =
23355 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23356 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23357 ActiveBits = CurrActiveBits;
23358 // We want to at least halve the element size.
23359 if (2 * ActiveBits > EltBitwidth)
23360 return SDValue();
23361 }
23362
23363 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23364 if (ActiveBits == 0)
23365 return SDValue();
23366
23367 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23368 // into how many chunks can we split our element width?
23369 EVT NewScalarIntVT, NewIntVT;
23370 std::optional<unsigned> Factor;
23371 // We can split the element into at least two chunks, but not into more
23372 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23373 // for which the element width is a multiple of it,
23374 // and the resulting types/operations on that chunk width are legal.
23375 assert(2 * ActiveBits <= EltBitwidth &&
23376 "We know that half or less bits of the element are active.");
23377 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23378 if (EltBitwidth % Scale != 0)
23379 continue;
23380 unsigned ChunkBitwidth = EltBitwidth / Scale;
23381 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23382 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23383 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23384 Scale * N->getNumOperands());
23385 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23386 (LegalOperations &&
23387 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23389 continue;
23390 Factor = Scale;
23391 break;
23392 }
23393 if (!Factor)
23394 return SDValue();
23395
23396 SDLoc DL(N);
23397 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23398
23399 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23401 NewOps.reserve(NewIntVT.getVectorNumElements());
23402 for (auto I : enumerate(N->ops())) {
23403 SDValue Op = I.value();
23404 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23405 unsigned SrcOpIdx = I.index();
23406 if (KnownZeroOps[SrcOpIdx]) {
23407 NewOps.append(*Factor, ZeroOp);
23408 continue;
23409 }
23410 Op = DAG.getBitcast(OpIntVT, Op);
23411 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23412 NewOps.emplace_back(Op);
23413 NewOps.append(*Factor - 1, ZeroOp);
23414 }
23415 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23416 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23417 NewBV = DAG.getBitcast(VT, NewBV);
23418 return NewBV;
23419}
23420
23421SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23422 EVT VT = N->getValueType(0);
23423
23424 // A vector built entirely of undefs is undef.
23426 return DAG.getUNDEF(VT);
23427
23428 // If this is a splat of a bitcast from another vector, change to a
23429 // concat_vector.
23430 // For example:
23431 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23432 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23433 //
23434 // If X is a build_vector itself, the concat can become a larger build_vector.
23435 // TODO: Maybe this is useful for non-splat too?
23436 if (!LegalOperations) {
23437 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23438 // Only change build_vector to a concat_vector if the splat value type is
23439 // same as the vector element type.
23440 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23442 EVT SrcVT = Splat.getValueType();
23443 if (SrcVT.isVector()) {
23444 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23445 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23446 SrcVT.getVectorElementType(), NumElts);
23447 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23448 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23449 SDValue Concat =
23450 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23451 return DAG.getBitcast(VT, Concat);
23452 }
23453 }
23454 }
23455 }
23456
23457 // Check if we can express BUILD VECTOR via subvector extract.
23458 if (!LegalTypes && (N->getNumOperands() > 1)) {
23459 SDValue Op0 = N->getOperand(0);
23460 auto checkElem = [&](SDValue Op) -> uint64_t {
23461 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23462 (Op0.getOperand(0) == Op.getOperand(0)))
23463 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23464 return CNode->getZExtValue();
23465 return -1;
23466 };
23467
23468 int Offset = checkElem(Op0);
23469 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23470 if (Offset + i != checkElem(N->getOperand(i))) {
23471 Offset = -1;
23472 break;
23473 }
23474 }
23475
23476 if ((Offset == 0) &&
23477 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23478 return Op0.getOperand(0);
23479 if ((Offset != -1) &&
23480 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23481 0)) // IDX must be multiple of output size.
23482 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23483 Op0.getOperand(0), Op0.getOperand(1));
23484 }
23485
23486 if (SDValue V = convertBuildVecZextToZext(N))
23487 return V;
23488
23489 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23490 return V;
23491
23492 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23493 return V;
23494
23495 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23496 return V;
23497
23498 if (SDValue V = reduceBuildVecToShuffle(N))
23499 return V;
23500
23501 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23502 // Do this late as some of the above may replace the splat.
23504 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23505 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23506 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23507 }
23508
23509 return SDValue();
23510}
23511
23513 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23514 EVT OpVT = N->getOperand(0).getValueType();
23515
23516 // If the operands are legal vectors, leave them alone.
23517 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23518 return SDValue();
23519
23520 SDLoc DL(N);
23521 EVT VT = N->getValueType(0);
23523 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23524
23525 // Keep track of what we encounter.
23526 bool AnyInteger = false;
23527 bool AnyFP = false;
23528 for (const SDValue &Op : N->ops()) {
23529 if (ISD::BITCAST == Op.getOpcode() &&
23530 !Op.getOperand(0).getValueType().isVector())
23531 Ops.push_back(Op.getOperand(0));
23532 else if (ISD::UNDEF == Op.getOpcode())
23533 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23534 else
23535 return SDValue();
23536
23537 // Note whether we encounter an integer or floating point scalar.
23538 // If it's neither, bail out, it could be something weird like x86mmx.
23539 EVT LastOpVT = Ops.back().getValueType();
23540 if (LastOpVT.isFloatingPoint())
23541 AnyFP = true;
23542 else if (LastOpVT.isInteger())
23543 AnyInteger = true;
23544 else
23545 return SDValue();
23546 }
23547
23548 // If any of the operands is a floating point scalar bitcast to a vector,
23549 // use floating point types throughout, and bitcast everything.
23550 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23551 if (AnyFP) {
23553 if (AnyInteger) {
23554 for (SDValue &Op : Ops) {
23555 if (Op.getValueType() == SVT)
23556 continue;
23557 if (Op.isUndef())
23558 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23559 else
23560 Op = DAG.getBitcast(SVT, Op);
23561 }
23562 }
23563 }
23564
23565 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23566 VT.getSizeInBits() / SVT.getSizeInBits());
23567 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23568}
23569
23570// Attempt to merge nested concat_vectors/undefs.
23571// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23572// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23574 SelectionDAG &DAG) {
23575 EVT VT = N->getValueType(0);
23576
23577 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23578 EVT SubVT;
23579 SDValue FirstConcat;
23580 for (const SDValue &Op : N->ops()) {
23581 if (Op.isUndef())
23582 continue;
23583 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23584 return SDValue();
23585 if (!FirstConcat) {
23586 SubVT = Op.getOperand(0).getValueType();
23587 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23588 return SDValue();
23589 FirstConcat = Op;
23590 continue;
23591 }
23592 if (SubVT != Op.getOperand(0).getValueType())
23593 return SDValue();
23594 }
23595 assert(FirstConcat && "Concat of all-undefs found");
23596
23597 SmallVector<SDValue> ConcatOps;
23598 for (const SDValue &Op : N->ops()) {
23599 if (Op.isUndef()) {
23600 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23601 continue;
23602 }
23603 ConcatOps.append(Op->op_begin(), Op->op_end());
23604 }
23605 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23606}
23607
23608// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23609// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23610// most two distinct vectors the same size as the result, attempt to turn this
23611// into a legal shuffle.
23613 EVT VT = N->getValueType(0);
23614 EVT OpVT = N->getOperand(0).getValueType();
23615
23616 // We currently can't generate an appropriate shuffle for a scalable vector.
23617 if (VT.isScalableVector())
23618 return SDValue();
23619
23620 int NumElts = VT.getVectorNumElements();
23621 int NumOpElts = OpVT.getVectorNumElements();
23622
23623 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23625
23626 for (SDValue Op : N->ops()) {
23628
23629 // UNDEF nodes convert to UNDEF shuffle mask values.
23630 if (Op.isUndef()) {
23631 Mask.append((unsigned)NumOpElts, -1);
23632 continue;
23633 }
23634
23635 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23636 return SDValue();
23637
23638 // What vector are we extracting the subvector from and at what index?
23639 SDValue ExtVec = Op.getOperand(0);
23640 int ExtIdx = Op.getConstantOperandVal(1);
23641
23642 // We want the EVT of the original extraction to correctly scale the
23643 // extraction index.
23644 EVT ExtVT = ExtVec.getValueType();
23645 ExtVec = peekThroughBitcasts(ExtVec);
23646
23647 // UNDEF nodes convert to UNDEF shuffle mask values.
23648 if (ExtVec.isUndef()) {
23649 Mask.append((unsigned)NumOpElts, -1);
23650 continue;
23651 }
23652
23653 // Ensure that we are extracting a subvector from a vector the same
23654 // size as the result.
23655 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23656 return SDValue();
23657
23658 // Scale the subvector index to account for any bitcast.
23659 int NumExtElts = ExtVT.getVectorNumElements();
23660 if (0 == (NumExtElts % NumElts))
23661 ExtIdx /= (NumExtElts / NumElts);
23662 else if (0 == (NumElts % NumExtElts))
23663 ExtIdx *= (NumElts / NumExtElts);
23664 else
23665 return SDValue();
23666
23667 // At most we can reference 2 inputs in the final shuffle.
23668 if (SV0.isUndef() || SV0 == ExtVec) {
23669 SV0 = ExtVec;
23670 for (int i = 0; i != NumOpElts; ++i)
23671 Mask.push_back(i + ExtIdx);
23672 } else if (SV1.isUndef() || SV1 == ExtVec) {
23673 SV1 = ExtVec;
23674 for (int i = 0; i != NumOpElts; ++i)
23675 Mask.push_back(i + ExtIdx + NumElts);
23676 } else {
23677 return SDValue();
23678 }
23679 }
23680
23681 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23682 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23683 DAG.getBitcast(VT, SV1), Mask, DAG);
23684}
23685
23687 unsigned CastOpcode = N->getOperand(0).getOpcode();
23688 switch (CastOpcode) {
23689 case ISD::SINT_TO_FP:
23690 case ISD::UINT_TO_FP:
23691 case ISD::FP_TO_SINT:
23692 case ISD::FP_TO_UINT:
23693 // TODO: Allow more opcodes?
23694 // case ISD::BITCAST:
23695 // case ISD::TRUNCATE:
23696 // case ISD::ZERO_EXTEND:
23697 // case ISD::SIGN_EXTEND:
23698 // case ISD::FP_EXTEND:
23699 break;
23700 default:
23701 return SDValue();
23702 }
23703
23704 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23705 if (!SrcVT.isVector())
23706 return SDValue();
23707
23708 // All operands of the concat must be the same kind of cast from the same
23709 // source type.
23711 for (SDValue Op : N->ops()) {
23712 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23713 Op.getOperand(0).getValueType() != SrcVT)
23714 return SDValue();
23715 SrcOps.push_back(Op.getOperand(0));
23716 }
23717
23718 // The wider cast must be supported by the target. This is unusual because
23719 // the operation support type parameter depends on the opcode. In addition,
23720 // check the other type in the cast to make sure this is really legal.
23721 EVT VT = N->getValueType(0);
23722 EVT SrcEltVT = SrcVT.getVectorElementType();
23723 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23724 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23725 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23726 switch (CastOpcode) {
23727 case ISD::SINT_TO_FP:
23728 case ISD::UINT_TO_FP:
23729 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23730 !TLI.isTypeLegal(VT))
23731 return SDValue();
23732 break;
23733 case ISD::FP_TO_SINT:
23734 case ISD::FP_TO_UINT:
23735 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23736 !TLI.isTypeLegal(ConcatSrcVT))
23737 return SDValue();
23738 break;
23739 default:
23740 llvm_unreachable("Unexpected cast opcode");
23741 }
23742
23743 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23744 SDLoc DL(N);
23745 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23746 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23747}
23748
23749// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23750// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23751// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23753 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23754 bool LegalOperations) {
23755 EVT VT = N->getValueType(0);
23756 EVT OpVT = N->getOperand(0).getValueType();
23757 if (VT.isScalableVector())
23758 return SDValue();
23759
23760 // For now, only allow simple 2-operand concatenations.
23761 if (N->getNumOperands() != 2)
23762 return SDValue();
23763
23764 // Don't create illegal types/shuffles when not allowed to.
23765 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23766 (LegalOperations &&
23768 return SDValue();
23769
23770 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23771 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23772 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23773 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23774 // (4) and for now, the SHUFFLE_VECTOR must be unary.
23775 ShuffleVectorSDNode *SVN = nullptr;
23776 for (SDValue Op : N->ops()) {
23777 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23778 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23779 all_of(N->ops(), [CurSVN](SDValue Op) {
23780 // FIXME: can we allow UNDEF operands?
23781 return !Op.isUndef() &&
23782 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23783 })) {
23784 SVN = CurSVN;
23785 break;
23786 }
23787 }
23788 if (!SVN)
23789 return SDValue();
23790
23791 // We are going to pad the shuffle operands, so any indice, that was picking
23792 // from the second operand, must be adjusted.
23793 SmallVector<int, 16> AdjustedMask;
23794 AdjustedMask.reserve(SVN->getMask().size());
23795 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23796 append_range(AdjustedMask, SVN->getMask());
23797
23798 // Identity masks for the operands of the (padded) shuffle.
23799 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23800 MutableArrayRef<int> FirstShufOpIdentityMask =
23801 MutableArrayRef<int>(IdentityMask)
23803 MutableArrayRef<int> SecondShufOpIdentityMask =
23805 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
23806 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
23808
23809 // New combined shuffle mask.
23811 Mask.reserve(VT.getVectorNumElements());
23812 for (SDValue Op : N->ops()) {
23813 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
23814 if (Op.getNode() == SVN) {
23815 append_range(Mask, AdjustedMask);
23816 continue;
23817 }
23818 if (Op == SVN->getOperand(0)) {
23819 append_range(Mask, FirstShufOpIdentityMask);
23820 continue;
23821 }
23822 if (Op == SVN->getOperand(1)) {
23823 append_range(Mask, SecondShufOpIdentityMask);
23824 continue;
23825 }
23826 llvm_unreachable("Unexpected operand!");
23827 }
23828
23829 // Don't create illegal shuffle masks.
23830 if (!TLI.isShuffleMaskLegal(Mask, VT))
23831 return SDValue();
23832
23833 // Pad the shuffle operands with UNDEF.
23834 SDLoc dl(N);
23835 std::array<SDValue, 2> ShufOps;
23836 for (auto I : zip(SVN->ops(), ShufOps)) {
23837 SDValue ShufOp = std::get<0>(I);
23838 SDValue &NewShufOp = std::get<1>(I);
23839 if (ShufOp.isUndef())
23840 NewShufOp = DAG.getUNDEF(VT);
23841 else {
23842 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
23843 DAG.getUNDEF(OpVT));
23844 ShufOpParts[0] = ShufOp;
23845 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
23846 }
23847 }
23848 // Finally, create the new wide shuffle.
23849 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
23850}
23851
23852SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
23853 // If we only have one input vector, we don't need to do any concatenation.
23854 if (N->getNumOperands() == 1)
23855 return N->getOperand(0);
23856
23857 // Check if all of the operands are undefs.
23858 EVT VT = N->getValueType(0);
23860 return DAG.getUNDEF(VT);
23861
23862 // Optimize concat_vectors where all but the first of the vectors are undef.
23863 if (all_of(drop_begin(N->ops()),
23864 [](const SDValue &Op) { return Op.isUndef(); })) {
23865 SDValue In = N->getOperand(0);
23866 assert(In.getValueType().isVector() && "Must concat vectors");
23867
23868 // If the input is a concat_vectors, just make a larger concat by padding
23869 // with smaller undefs.
23870 //
23871 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
23872 // here could cause an infinite loop. That legalizing happens when LegalDAG
23873 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
23874 // scalable.
23875 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
23876 !(LegalDAG && In.getValueType().isScalableVector())) {
23877 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
23878 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
23879 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
23880 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23881 }
23882
23884
23885 // concat_vectors(scalar_to_vector(scalar), undef) ->
23886 // scalar_to_vector(scalar)
23887 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23888 Scalar.hasOneUse()) {
23889 EVT SVT = Scalar.getValueType().getVectorElementType();
23890 if (SVT == Scalar.getOperand(0).getValueType())
23891 Scalar = Scalar.getOperand(0);
23892 }
23893
23894 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
23895 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
23896 // If the bitcast type isn't legal, it might be a trunc of a legal type;
23897 // look through the trunc so we can still do the transform:
23898 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
23899 if (Scalar->getOpcode() == ISD::TRUNCATE &&
23900 !TLI.isTypeLegal(Scalar.getValueType()) &&
23901 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
23902 Scalar = Scalar->getOperand(0);
23903
23904 EVT SclTy = Scalar.getValueType();
23905
23906 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
23907 return SDValue();
23908
23909 // Bail out if the vector size is not a multiple of the scalar size.
23910 if (VT.getSizeInBits() % SclTy.getSizeInBits())
23911 return SDValue();
23912
23913 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
23914 if (VNTNumElms < 2)
23915 return SDValue();
23916
23917 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
23918 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
23919 return SDValue();
23920
23921 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
23922 return DAG.getBitcast(VT, Res);
23923 }
23924 }
23925
23926 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
23927 // We have already tested above for an UNDEF only concatenation.
23928 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
23929 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
23930 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
23931 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
23932 };
23933 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
23935 EVT SVT = VT.getScalarType();
23936
23937 EVT MinVT = SVT;
23938 if (!SVT.isFloatingPoint()) {
23939 // If BUILD_VECTOR are from built from integer, they may have different
23940 // operand types. Get the smallest type and truncate all operands to it.
23941 bool FoundMinVT = false;
23942 for (const SDValue &Op : N->ops())
23943 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23944 EVT OpSVT = Op.getOperand(0).getValueType();
23945 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
23946 FoundMinVT = true;
23947 }
23948 assert(FoundMinVT && "Concat vector type mismatch");
23949 }
23950
23951 for (const SDValue &Op : N->ops()) {
23952 EVT OpVT = Op.getValueType();
23953 unsigned NumElts = OpVT.getVectorNumElements();
23954
23955 if (ISD::UNDEF == Op.getOpcode())
23956 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
23957
23958 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23959 if (SVT.isFloatingPoint()) {
23960 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
23961 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
23962 } else {
23963 for (unsigned i = 0; i != NumElts; ++i)
23964 Opnds.push_back(
23965 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
23966 }
23967 }
23968 }
23969
23970 assert(VT.getVectorNumElements() == Opnds.size() &&
23971 "Concat vector type mismatch");
23972 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
23973 }
23974
23975 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
23976 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
23978 return V;
23979
23980 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
23981 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
23983 return V;
23984
23985 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
23987 return V;
23988 }
23989
23990 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
23991 return V;
23992
23994 N, DAG, TLI, LegalTypes, LegalOperations))
23995 return V;
23996
23997 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
23998 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
23999 // operands and look for a CONCAT operations that place the incoming vectors
24000 // at the exact same location.
24001 //
24002 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24003 SDValue SingleSource = SDValue();
24004 unsigned PartNumElem =
24005 N->getOperand(0).getValueType().getVectorMinNumElements();
24006
24007 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24008 SDValue Op = N->getOperand(i);
24009
24010 if (Op.isUndef())
24011 continue;
24012
24013 // Check if this is the identity extract:
24014 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24015 return SDValue();
24016
24017 // Find the single incoming vector for the extract_subvector.
24018 if (SingleSource.getNode()) {
24019 if (Op.getOperand(0) != SingleSource)
24020 return SDValue();
24021 } else {
24022 SingleSource = Op.getOperand(0);
24023
24024 // Check the source type is the same as the type of the result.
24025 // If not, this concat may extend the vector, so we can not
24026 // optimize it away.
24027 if (SingleSource.getValueType() != N->getValueType(0))
24028 return SDValue();
24029 }
24030
24031 // Check that we are reading from the identity index.
24032 unsigned IdentityIndex = i * PartNumElem;
24033 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24034 return SDValue();
24035 }
24036
24037 if (SingleSource.getNode())
24038 return SingleSource;
24039
24040 return SDValue();
24041}
24042
24043// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24044// if the subvector can be sourced for free.
24046 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24047 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24048 return V.getOperand(1);
24049 }
24050 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24051 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24052 V.getOperand(0).getValueType() == SubVT &&
24053 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24054 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24055 return V.getOperand(SubIdx);
24056 }
24057 return SDValue();
24058}
24059
24061 SelectionDAG &DAG,
24062 bool LegalOperations) {
24063 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24064 SDValue BinOp = Extract->getOperand(0);
24065 unsigned BinOpcode = BinOp.getOpcode();
24066 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24067 return SDValue();
24068
24069 EVT VecVT = BinOp.getValueType();
24070 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24071 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24072 return SDValue();
24073
24074 SDValue Index = Extract->getOperand(1);
24075 EVT SubVT = Extract->getValueType(0);
24076 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24077 return SDValue();
24078
24079 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24080 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24081
24082 // TODO: We could handle the case where only 1 operand is being inserted by
24083 // creating an extract of the other operand, but that requires checking
24084 // number of uses and/or costs.
24085 if (!Sub0 || !Sub1)
24086 return SDValue();
24087
24088 // We are inserting both operands of the wide binop only to extract back
24089 // to the narrow vector size. Eliminate all of the insert/extract:
24090 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24091 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24092 BinOp->getFlags());
24093}
24094
24095/// If we are extracting a subvector produced by a wide binary operator try
24096/// to use a narrow binary operator and/or avoid concatenation and extraction.
24098 bool LegalOperations) {
24099 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24100 // some of these bailouts with other transforms.
24101
24102 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24103 return V;
24104
24105 // The extract index must be a constant, so we can map it to a concat operand.
24106 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24107 if (!ExtractIndexC)
24108 return SDValue();
24109
24110 // We are looking for an optionally bitcasted wide vector binary operator
24111 // feeding an extract subvector.
24112 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24113 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24114 unsigned BOpcode = BinOp.getOpcode();
24115 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24116 return SDValue();
24117
24118 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24119 // reduced to the unary fneg when it is visited, and we probably want to deal
24120 // with fneg in a target-specific way.
24121 if (BOpcode == ISD::FSUB) {
24122 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24123 if (C && C->getValueAPF().isNegZero())
24124 return SDValue();
24125 }
24126
24127 // The binop must be a vector type, so we can extract some fraction of it.
24128 EVT WideBVT = BinOp.getValueType();
24129 // The optimisations below currently assume we are dealing with fixed length
24130 // vectors. It is possible to add support for scalable vectors, but at the
24131 // moment we've done no analysis to prove whether they are profitable or not.
24132 if (!WideBVT.isFixedLengthVector())
24133 return SDValue();
24134
24135 EVT VT = Extract->getValueType(0);
24136 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24137 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24138 "Extract index is not a multiple of the vector length.");
24139
24140 // Bail out if this is not a proper multiple width extraction.
24141 unsigned WideWidth = WideBVT.getSizeInBits();
24142 unsigned NarrowWidth = VT.getSizeInBits();
24143 if (WideWidth % NarrowWidth != 0)
24144 return SDValue();
24145
24146 // Bail out if we are extracting a fraction of a single operation. This can
24147 // occur because we potentially looked through a bitcast of the binop.
24148 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24149 unsigned WideNumElts = WideBVT.getVectorNumElements();
24150 if (WideNumElts % NarrowingRatio != 0)
24151 return SDValue();
24152
24153 // Bail out if the target does not support a narrower version of the binop.
24154 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24155 WideNumElts / NarrowingRatio);
24156 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24157 LegalOperations))
24158 return SDValue();
24159
24160 // If extraction is cheap, we don't need to look at the binop operands
24161 // for concat ops. The narrow binop alone makes this transform profitable.
24162 // We can't just reuse the original extract index operand because we may have
24163 // bitcasted.
24164 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24165 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24166 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24167 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24168 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24169 SDLoc DL(Extract);
24170 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24171 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24172 BinOp.getOperand(0), NewExtIndex);
24173 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24174 BinOp.getOperand(1), NewExtIndex);
24175 SDValue NarrowBinOp =
24176 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24177 return DAG.getBitcast(VT, NarrowBinOp);
24178 }
24179
24180 // Only handle the case where we are doubling and then halving. A larger ratio
24181 // may require more than two narrow binops to replace the wide binop.
24182 if (NarrowingRatio != 2)
24183 return SDValue();
24184
24185 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24186 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24187 // flavors, but no other 256-bit integer support. This could be extended to
24188 // handle any binop, but that may require fixing/adding other folds to avoid
24189 // codegen regressions.
24190 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24191 return SDValue();
24192
24193 // We need at least one concatenation operation of a binop operand to make
24194 // this transform worthwhile. The concat must double the input vector sizes.
24195 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24196 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24197 return V.getOperand(ConcatOpNum);
24198 return SDValue();
24199 };
24200 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24201 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24202
24203 if (SubVecL || SubVecR) {
24204 // If a binop operand was not the result of a concat, we must extract a
24205 // half-sized operand for our new narrow binop:
24206 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24207 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24208 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24209 SDLoc DL(Extract);
24210 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24211 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24212 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24213 BinOp.getOperand(0), IndexC);
24214
24215 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24216 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24217 BinOp.getOperand(1), IndexC);
24218
24219 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24220 return DAG.getBitcast(VT, NarrowBinOp);
24221 }
24222
24223 return SDValue();
24224}
24225
24226/// If we are extracting a subvector from a wide vector load, convert to a
24227/// narrow load to eliminate the extraction:
24228/// (extract_subvector (load wide vector)) --> (load narrow vector)
24230 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24231 if (DAG.getDataLayout().isBigEndian())
24232 return SDValue();
24233
24234 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24235 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24236 return SDValue();
24237
24238 // Allow targets to opt-out.
24239 EVT VT = Extract->getValueType(0);
24240
24241 // We can only create byte sized loads.
24242 if (!VT.isByteSized())
24243 return SDValue();
24244
24245 unsigned Index = Extract->getConstantOperandVal(1);
24246 unsigned NumElts = VT.getVectorMinNumElements();
24247 // A fixed length vector being extracted from a scalable vector
24248 // may not be any *smaller* than the scalable one.
24249 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24250 return SDValue();
24251
24252 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24253 // multiple of the minimum number of elements in the result type.
24254 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24255 "multiple of the result's element count");
24256
24257 // It's fine to use TypeSize here as we know the offset will not be negative.
24258 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24259
24260 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24261 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24262 return SDValue();
24263
24264 // The narrow load will be offset from the base address of the old load if
24265 // we are extracting from something besides index 0 (little-endian).
24266 SDLoc DL(Extract);
24267
24268 // TODO: Use "BaseIndexOffset" to make this more effective.
24269 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24270
24273 MachineMemOperand *MMO;
24274 if (Offset.isScalable()) {
24275 MachinePointerInfo MPI =
24277 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24278 } else
24279 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24280 StoreSize);
24281
24282 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24283 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24284 return NewLd;
24285}
24286
24287/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24288/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24289/// EXTRACT_SUBVECTOR(Op?, ?),
24290/// Mask'))
24291/// iff it is legal and profitable to do so. Notably, the trimmed mask
24292/// (containing only the elements that are extracted)
24293/// must reference at most two subvectors.
24295 SelectionDAG &DAG,
24296 const TargetLowering &TLI,
24297 bool LegalOperations) {
24298 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24299 "Must only be called on EXTRACT_SUBVECTOR's");
24300
24301 SDValue N0 = N->getOperand(0);
24302
24303 // Only deal with non-scalable vectors.
24304 EVT NarrowVT = N->getValueType(0);
24305 EVT WideVT = N0.getValueType();
24306 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24307 return SDValue();
24308
24309 // The operand must be a shufflevector.
24310 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24311 if (!WideShuffleVector)
24312 return SDValue();
24313
24314 // The old shuffleneeds to go away.
24315 if (!WideShuffleVector->hasOneUse())
24316 return SDValue();
24317
24318 // And the narrow shufflevector that we'll form must be legal.
24319 if (LegalOperations &&
24321 return SDValue();
24322
24323 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24324 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24325 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24326 "Extract index is not a multiple of the output vector length.");
24327
24328 int WideNumElts = WideVT.getVectorNumElements();
24329
24330 SmallVector<int, 16> NewMask;
24331 NewMask.reserve(NumEltsExtracted);
24332 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24333 DemandedSubvectors;
24334
24335 // Try to decode the wide mask into narrow mask from at most two subvectors.
24336 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24337 NumEltsExtracted)) {
24338 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24339 "Out-of-bounds shuffle mask?");
24340
24341 if (M < 0) {
24342 // Does not depend on operands, does not require adjustment.
24343 NewMask.emplace_back(M);
24344 continue;
24345 }
24346
24347 // From which operand of the shuffle does this shuffle mask element pick?
24348 int WideShufOpIdx = M / WideNumElts;
24349 // Which element of that operand is picked?
24350 int OpEltIdx = M % WideNumElts;
24351
24352 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24353 "Shuffle mask vector decomposition failure.");
24354
24355 // And which NumEltsExtracted-sized subvector of that operand is that?
24356 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24357 // And which element within that subvector of that operand is that?
24358 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24359
24360 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24361 "Shuffle mask subvector decomposition failure.");
24362
24363 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24364 WideShufOpIdx * WideNumElts) == M &&
24365 "Shuffle mask full decomposition failure.");
24366
24367 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24368
24369 if (Op.isUndef()) {
24370 // Picking from an undef operand. Let's adjust mask instead.
24371 NewMask.emplace_back(-1);
24372 continue;
24373 }
24374
24375 const std::pair<SDValue, int> DemandedSubvector =
24376 std::make_pair(Op, OpSubvecIdx);
24377
24378 if (DemandedSubvectors.insert(DemandedSubvector)) {
24379 if (DemandedSubvectors.size() > 2)
24380 return SDValue(); // We can't handle more than two subvectors.
24381 // How many elements into the WideVT does this subvector start?
24382 int Index = NumEltsExtracted * OpSubvecIdx;
24383 // Bail out if the extraction isn't going to be cheap.
24384 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24385 return SDValue();
24386 }
24387
24388 // Ok, but from which operand of the new shuffle will this element pick?
24389 int NewOpIdx =
24390 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24391 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24392
24393 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24394 NewMask.emplace_back(AdjM);
24395 }
24396 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24397 assert(DemandedSubvectors.size() <= 2 &&
24398 "Should have ended up demanding at most two subvectors.");
24399
24400 // Did we discover that the shuffle does not actually depend on operands?
24401 if (DemandedSubvectors.empty())
24402 return DAG.getUNDEF(NarrowVT);
24403
24404 // Profitability check: only deal with extractions from the first subvector
24405 // unless the mask becomes an identity mask.
24406 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24407 any_of(NewMask, [](int M) { return M < 0; }))
24408 for (auto &DemandedSubvector : DemandedSubvectors)
24409 if (DemandedSubvector.second != 0)
24410 return SDValue();
24411
24412 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24413 // operand[s]/index[es], so there is no point in checking for it's legality.
24414
24415 // Do not turn a legal shuffle into an illegal one.
24416 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24417 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24418 return SDValue();
24419
24420 SDLoc DL(N);
24421
24423 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24424 &DemandedSubvector : DemandedSubvectors) {
24425 // How many elements into the WideVT does this subvector start?
24426 int Index = NumEltsExtracted * DemandedSubvector.second;
24427 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24428 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24429 DemandedSubvector.first, IndexC));
24430 }
24431 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24432 "Should end up with either one or two ops");
24433
24434 // If we ended up with only one operand, pad with an undef.
24435 if (NewOps.size() == 1)
24436 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24437
24438 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24439}
24440
24441SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24442 EVT NVT = N->getValueType(0);
24443 SDValue V = N->getOperand(0);
24444 uint64_t ExtIdx = N->getConstantOperandVal(1);
24445 SDLoc DL(N);
24446
24447 // Extract from UNDEF is UNDEF.
24448 if (V.isUndef())
24449 return DAG.getUNDEF(NVT);
24450
24452 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24453 return NarrowLoad;
24454
24455 // Combine an extract of an extract into a single extract_subvector.
24456 // ext (ext X, C), 0 --> ext X, C
24457 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24458 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24459 V.getConstantOperandVal(1)) &&
24461 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24462 V.getOperand(1));
24463 }
24464 }
24465
24466 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24467 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24468 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24469 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24470 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24471
24472 // extract_subvector(insert_subvector(x,y,c1),c2)
24473 // --> extract_subvector(y,c2-c1)
24474 // iff we're just extracting from the inserted subvector.
24475 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24476 SDValue InsSub = V.getOperand(1);
24477 EVT InsSubVT = InsSub.getValueType();
24478 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24479 unsigned InsIdx = V.getConstantOperandVal(2);
24480 unsigned NumSubElts = NVT.getVectorMinNumElements();
24481 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24482 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24483 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24484 V.getValueType().isFixedLengthVector())
24485 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24486 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24487 }
24488
24489 // Try to move vector bitcast after extract_subv by scaling extraction index:
24490 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24491 if (V.getOpcode() == ISD::BITCAST &&
24492 V.getOperand(0).getValueType().isVector() &&
24493 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24494 SDValue SrcOp = V.getOperand(0);
24495 EVT SrcVT = SrcOp.getValueType();
24496 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24497 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24498 if ((SrcNumElts % DestNumElts) == 0) {
24499 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24500 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24501 EVT NewExtVT =
24502 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24504 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24505 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24506 V.getOperand(0), NewIndex);
24507 return DAG.getBitcast(NVT, NewExtract);
24508 }
24509 }
24510 if ((DestNumElts % SrcNumElts) == 0) {
24511 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24512 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24513 ElementCount NewExtEC =
24514 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24515 EVT ScalarVT = SrcVT.getScalarType();
24516 if ((ExtIdx % DestSrcRatio) == 0) {
24517 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24518 EVT NewExtVT =
24519 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24521 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24522 SDValue NewExtract =
24523 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24524 V.getOperand(0), NewIndex);
24525 return DAG.getBitcast(NVT, NewExtract);
24526 }
24527 if (NewExtEC.isScalar() &&
24529 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24530 SDValue NewExtract =
24531 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24532 V.getOperand(0), NewIndex);
24533 return DAG.getBitcast(NVT, NewExtract);
24534 }
24535 }
24536 }
24537 }
24538 }
24539
24540 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24541 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24542 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24543 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24544 "Concat and extract subvector do not change element type");
24545 assert((ExtIdx % ExtNumElts) == 0 &&
24546 "Extract index is not a multiple of the input vector length.");
24547
24548 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24549 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24550
24551 // If the concatenated source types match this extract, it's a direct
24552 // simplification:
24553 // extract_subvec (concat V1, V2, ...), i --> Vi
24554 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24555 return V.getOperand(ConcatOpIdx);
24556
24557 // If the concatenated source vectors are a multiple length of this extract,
24558 // then extract a fraction of one of those source vectors directly from a
24559 // concat operand. Example:
24560 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24561 // v2i8 extract_subvec v8i8 Y, 6
24562 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24563 ConcatSrcNumElts % ExtNumElts == 0) {
24564 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24565 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24566 "Trying to extract from >1 concat operand?");
24567 assert(NewExtIdx % ExtNumElts == 0 &&
24568 "Extract index is not a multiple of the input vector length.");
24569 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24570 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24571 V.getOperand(ConcatOpIdx), NewIndexC);
24572 }
24573 }
24574
24575 if (SDValue V =
24576 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24577 return V;
24578
24580
24581 // If the input is a build vector. Try to make a smaller build vector.
24582 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24583 EVT InVT = V.getValueType();
24584 unsigned ExtractSize = NVT.getSizeInBits();
24585 unsigned EltSize = InVT.getScalarSizeInBits();
24586 // Only do this if we won't split any elements.
24587 if (ExtractSize % EltSize == 0) {
24588 unsigned NumElems = ExtractSize / EltSize;
24589 EVT EltVT = InVT.getVectorElementType();
24590 EVT ExtractVT =
24591 NumElems == 1 ? EltVT
24592 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24593 if ((Level < AfterLegalizeDAG ||
24594 (NumElems == 1 ||
24595 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24596 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24597 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24598
24599 if (NumElems == 1) {
24600 SDValue Src = V->getOperand(IdxVal);
24601 if (EltVT != Src.getValueType())
24602 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24603 return DAG.getBitcast(NVT, Src);
24604 }
24605
24606 // Extract the pieces from the original build_vector.
24607 SDValue BuildVec =
24608 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24609 return DAG.getBitcast(NVT, BuildVec);
24610 }
24611 }
24612 }
24613
24614 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24615 // Handle only simple case where vector being inserted and vector
24616 // being extracted are of same size.
24617 EVT SmallVT = V.getOperand(1).getValueType();
24618 if (!NVT.bitsEq(SmallVT))
24619 return SDValue();
24620
24621 // Combine:
24622 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24623 // Into:
24624 // indices are equal or bit offsets are equal => V1
24625 // otherwise => (extract_subvec V1, ExtIdx)
24626 uint64_t InsIdx = V.getConstantOperandVal(2);
24627 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24628 ExtIdx * NVT.getScalarSizeInBits()) {
24629 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24630 return SDValue();
24631
24632 return DAG.getBitcast(NVT, V.getOperand(1));
24633 }
24634 return DAG.getNode(
24636 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24637 N->getOperand(1));
24638 }
24639
24640 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24641 return NarrowBOp;
24642
24644 return SDValue(N, 0);
24645
24646 return SDValue();
24647}
24648
24649/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24650/// followed by concatenation. Narrow vector ops may have better performance
24651/// than wide ops, and this can unlock further narrowing of other vector ops.
24652/// Targets can invert this transform later if it is not profitable.
24654 SelectionDAG &DAG) {
24655 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24656 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24657 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24658 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24659 return SDValue();
24660
24661 // Split the wide shuffle mask into halves. Any mask element that is accessing
24662 // operand 1 is offset down to account for narrowing of the vectors.
24663 ArrayRef<int> Mask = Shuf->getMask();
24664 EVT VT = Shuf->getValueType(0);
24665 unsigned NumElts = VT.getVectorNumElements();
24666 unsigned HalfNumElts = NumElts / 2;
24667 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24668 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24669 for (unsigned i = 0; i != NumElts; ++i) {
24670 if (Mask[i] == -1)
24671 continue;
24672 // If we reference the upper (undef) subvector then the element is undef.
24673 if ((Mask[i] % NumElts) >= HalfNumElts)
24674 continue;
24675 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24676 if (i < HalfNumElts)
24677 Mask0[i] = M;
24678 else
24679 Mask1[i - HalfNumElts] = M;
24680 }
24681
24682 // Ask the target if this is a valid transform.
24683 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24684 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24685 HalfNumElts);
24686 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24687 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24688 return SDValue();
24689
24690 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24691 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24692 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24693 SDLoc DL(Shuf);
24694 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24695 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24696 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24697}
24698
24699// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24700// or turn a shuffle of a single concat into simpler shuffle then concat.
24702 EVT VT = N->getValueType(0);
24703 unsigned NumElts = VT.getVectorNumElements();
24704
24705 SDValue N0 = N->getOperand(0);
24706 SDValue N1 = N->getOperand(1);
24707 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24708 ArrayRef<int> Mask = SVN->getMask();
24709
24711 EVT ConcatVT = N0.getOperand(0).getValueType();
24712 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24713 unsigned NumConcats = NumElts / NumElemsPerConcat;
24714
24715 auto IsUndefMaskElt = [](int i) { return i == -1; };
24716
24717 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24718 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24719 // half vector elements.
24720 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24721 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24722 IsUndefMaskElt)) {
24723 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24724 N0.getOperand(1),
24725 Mask.slice(0, NumElemsPerConcat));
24726 N1 = DAG.getUNDEF(ConcatVT);
24727 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24728 }
24729
24730 // Look at every vector that's inserted. We're looking for exact
24731 // subvector-sized copies from a concatenated vector
24732 for (unsigned I = 0; I != NumConcats; ++I) {
24733 unsigned Begin = I * NumElemsPerConcat;
24734 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24735
24736 // Make sure we're dealing with a copy.
24737 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24738 Ops.push_back(DAG.getUNDEF(ConcatVT));
24739 continue;
24740 }
24741
24742 int OpIdx = -1;
24743 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24744 if (IsUndefMaskElt(SubMask[i]))
24745 continue;
24746 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24747 return SDValue();
24748 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24749 if (0 <= OpIdx && EltOpIdx != OpIdx)
24750 return SDValue();
24751 OpIdx = EltOpIdx;
24752 }
24753 assert(0 <= OpIdx && "Unknown concat_vectors op");
24754
24755 if (OpIdx < (int)N0.getNumOperands())
24756 Ops.push_back(N0.getOperand(OpIdx));
24757 else
24758 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24759 }
24760
24761 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24762}
24763
24764// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24765// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24766//
24767// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24768// a simplification in some sense, but it isn't appropriate in general: some
24769// BUILD_VECTORs are substantially cheaper than others. The general case
24770// of a BUILD_VECTOR requires inserting each element individually (or
24771// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24772// all constants is a single constant pool load. A BUILD_VECTOR where each
24773// element is identical is a splat. A BUILD_VECTOR where most of the operands
24774// are undef lowers to a small number of element insertions.
24775//
24776// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24777// We don't fold shuffles where one side is a non-zero constant, and we don't
24778// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24779// non-constant operands. This seems to work out reasonably well in practice.
24781 SelectionDAG &DAG,
24782 const TargetLowering &TLI) {
24783 EVT VT = SVN->getValueType(0);
24784 unsigned NumElts = VT.getVectorNumElements();
24785 SDValue N0 = SVN->getOperand(0);
24786 SDValue N1 = SVN->getOperand(1);
24787
24788 if (!N0->hasOneUse())
24789 return SDValue();
24790
24791 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24792 // discussed above.
24793 if (!N1.isUndef()) {
24794 if (!N1->hasOneUse())
24795 return SDValue();
24796
24797 bool N0AnyConst = isAnyConstantBuildVector(N0);
24798 bool N1AnyConst = isAnyConstantBuildVector(N1);
24799 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24800 return SDValue();
24801 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24802 return SDValue();
24803 }
24804
24805 // If both inputs are splats of the same value then we can safely merge this
24806 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
24807 bool IsSplat = false;
24808 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
24809 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
24810 if (BV0 && BV1)
24811 if (SDValue Splat0 = BV0->getSplatValue())
24812 IsSplat = (Splat0 == BV1->getSplatValue());
24813
24815 SmallSet<SDValue, 16> DuplicateOps;
24816 for (int M : SVN->getMask()) {
24817 SDValue Op = DAG.getUNDEF(VT.getScalarType());
24818 if (M >= 0) {
24819 int Idx = M < (int)NumElts ? M : M - NumElts;
24820 SDValue &S = (M < (int)NumElts ? N0 : N1);
24821 if (S.getOpcode() == ISD::BUILD_VECTOR) {
24822 Op = S.getOperand(Idx);
24823 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24824 SDValue Op0 = S.getOperand(0);
24825 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
24826 } else {
24827 // Operand can't be combined - bail out.
24828 return SDValue();
24829 }
24830 }
24831
24832 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
24833 // generating a splat; semantically, this is fine, but it's likely to
24834 // generate low-quality code if the target can't reconstruct an appropriate
24835 // shuffle.
24836 if (!Op.isUndef() && !isIntOrFPConstant(Op))
24837 if (!IsSplat && !DuplicateOps.insert(Op).second)
24838 return SDValue();
24839
24840 Ops.push_back(Op);
24841 }
24842
24843 // BUILD_VECTOR requires all inputs to be of the same type, find the
24844 // maximum type and extend them all.
24845 EVT SVT = VT.getScalarType();
24846 if (SVT.isInteger())
24847 for (SDValue &Op : Ops)
24848 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
24849 if (SVT != VT.getScalarType())
24850 for (SDValue &Op : Ops)
24851 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
24852 : (TLI.isZExtFree(Op.getValueType(), SVT)
24853 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
24854 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
24855 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
24856}
24857
24858// Match shuffles that can be converted to *_vector_extend_in_reg.
24859// This is often generated during legalization.
24860// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
24861// and returns the EVT to which the extension should be performed.
24862// NOTE: this assumes that the src is the first operand of the shuffle.
24864 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
24865 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24866 bool LegalOperations) {
24867 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24868
24869 // TODO Add support for big-endian when we have a test case.
24870 if (!VT.isInteger() || IsBigEndian)
24871 return std::nullopt;
24872
24873 unsigned NumElts = VT.getVectorNumElements();
24874 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24875
24876 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
24877 // power-of-2 extensions as they are the most likely.
24878 // FIXME: should try Scale == NumElts case too,
24879 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
24880 // The vector width must be a multiple of Scale.
24881 if (NumElts % Scale != 0)
24882 continue;
24883
24884 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
24885 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
24886
24887 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
24888 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
24889 continue;
24890
24891 if (Match(Scale))
24892 return OutVT;
24893 }
24894
24895 return std::nullopt;
24896}
24897
24898// Match shuffles that can be converted to any_vector_extend_in_reg.
24899// This is often generated during legalization.
24900// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
24902 SelectionDAG &DAG,
24903 const TargetLowering &TLI,
24904 bool LegalOperations) {
24905 EVT VT = SVN->getValueType(0);
24906 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24907
24908 // TODO Add support for big-endian when we have a test case.
24909 if (!VT.isInteger() || IsBigEndian)
24910 return SDValue();
24911
24912 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
24913 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
24914 Mask = SVN->getMask()](unsigned Scale) {
24915 for (unsigned i = 0; i != NumElts; ++i) {
24916 if (Mask[i] < 0)
24917 continue;
24918 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
24919 continue;
24920 return false;
24921 }
24922 return true;
24923 };
24924
24925 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
24926 SDValue N0 = SVN->getOperand(0);
24927 // Never create an illegal type. Only create unsupported operations if we
24928 // are pre-legalization.
24929 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24930 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
24931 if (!OutVT)
24932 return SDValue();
24933 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
24934}
24935
24936// Match shuffles that can be converted to zero_extend_vector_inreg.
24937// This is often generated during legalization.
24938// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
24940 SelectionDAG &DAG,
24941 const TargetLowering &TLI,
24942 bool LegalOperations) {
24943 bool LegalTypes = true;
24944 EVT VT = SVN->getValueType(0);
24945 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
24946 unsigned NumElts = VT.getVectorNumElements();
24947 unsigned EltSizeInBits = VT.getScalarSizeInBits();
24948
24949 // TODO: add support for big-endian when we have a test case.
24950 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24951 if (!VT.isInteger() || IsBigEndian)
24952 return SDValue();
24953
24954 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
24955 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
24956 for (int &Indice : Mask) {
24957 if (Indice < 0)
24958 continue;
24959 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
24960 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
24961 Fn(Indice, OpIdx, OpEltIdx);
24962 }
24963 };
24964
24965 // Which elements of which operand does this shuffle demand?
24966 std::array<APInt, 2> OpsDemandedElts;
24967 for (APInt &OpDemandedElts : OpsDemandedElts)
24968 OpDemandedElts = APInt::getZero(NumElts);
24969 ForEachDecomposedIndice(
24970 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
24971 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
24972 });
24973
24974 // Element-wise(!), which of these demanded elements are know to be zero?
24975 std::array<APInt, 2> OpsKnownZeroElts;
24976 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
24977 std::get<2>(I) =
24978 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
24979
24980 // Manifest zeroable element knowledge in the shuffle mask.
24981 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
24982 // this is a local invention, but it won't leak into DAG.
24983 // FIXME: should we not manifest them, but just check when matching?
24984 bool HadZeroableElts = false;
24985 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
24986 int &Indice, int OpIdx, int OpEltIdx) {
24987 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
24988 Indice = -2; // Zeroable element.
24989 HadZeroableElts = true;
24990 }
24991 });
24992
24993 // Don't proceed unless we've refined at least one zeroable mask indice.
24994 // If we didn't, then we are still trying to match the same shuffle mask
24995 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
24996 // and evidently failed. Proceeding will lead to endless combine loops.
24997 if (!HadZeroableElts)
24998 return SDValue();
24999
25000 // The shuffle may be more fine-grained than we want. Widen elements first.
25001 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25002 SmallVector<int, 16> ScaledMask;
25003 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25004 assert(Mask.size() >= ScaledMask.size() &&
25005 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25006 int Prescale = Mask.size() / ScaledMask.size();
25007
25008 NumElts = ScaledMask.size();
25009 EltSizeInBits *= Prescale;
25010
25011 EVT PrescaledVT = EVT::getVectorVT(
25012 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25013 NumElts);
25014
25015 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25016 return SDValue();
25017
25018 // For example,
25019 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25020 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25021 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25022 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25023 "Unexpected mask scaling factor.");
25024 ArrayRef<int> Mask = ScaledMask;
25025 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25026 SrcElt != NumSrcElts; ++SrcElt) {
25027 // Analyze the shuffle mask in Scale-sized chunks.
25028 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25029 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25030 Mask = Mask.drop_front(MaskChunk.size());
25031 // The first indice in this chunk must be SrcElt, but not zero!
25032 // FIXME: undef should be fine, but that results in more-defined result.
25033 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25034 return false;
25035 // The rest of the indices in this chunk must be zeros.
25036 // FIXME: undef should be fine, but that results in more-defined result.
25037 if (!all_of(MaskChunk.drop_front(1),
25038 [](int Indice) { return Indice == -2; }))
25039 return false;
25040 }
25041 assert(Mask.empty() && "Did not process the whole mask?");
25042 return true;
25043 };
25044
25045 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25046 for (bool Commuted : {false, true}) {
25047 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25048 if (Commuted)
25050 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25051 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25052 LegalOperations);
25053 if (OutVT)
25054 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25055 DAG.getBitcast(PrescaledVT, Op)));
25056 }
25057 return SDValue();
25058}
25059
25060// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25061// each source element of a large type into the lowest elements of a smaller
25062// destination type. This is often generated during legalization.
25063// If the source node itself was a '*_extend_vector_inreg' node then we should
25064// then be able to remove it.
25066 SelectionDAG &DAG) {
25067 EVT VT = SVN->getValueType(0);
25068 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25069
25070 // TODO Add support for big-endian when we have a test case.
25071 if (!VT.isInteger() || IsBigEndian)
25072 return SDValue();
25073
25075
25076 unsigned Opcode = N0.getOpcode();
25077 if (!ISD::isExtVecInRegOpcode(Opcode))
25078 return SDValue();
25079
25080 SDValue N00 = N0.getOperand(0);
25081 ArrayRef<int> Mask = SVN->getMask();
25082 unsigned NumElts = VT.getVectorNumElements();
25083 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25084 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25085 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25086
25087 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25088 return SDValue();
25089 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25090
25091 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25092 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25093 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25094 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25095 for (unsigned i = 0; i != NumElts; ++i) {
25096 if (Mask[i] < 0)
25097 continue;
25098 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25099 continue;
25100 return false;
25101 }
25102 return true;
25103 };
25104
25105 // At the moment we just handle the case where we've truncated back to the
25106 // same size as before the extension.
25107 // TODO: handle more extension/truncation cases as cases arise.
25108 if (EltSizeInBits != ExtSrcSizeInBits)
25109 return SDValue();
25110
25111 // We can remove *extend_vector_inreg only if the truncation happens at
25112 // the same scale as the extension.
25113 if (isTruncate(ExtScale))
25114 return DAG.getBitcast(VT, N00);
25115
25116 return SDValue();
25117}
25118
25119// Combine shuffles of splat-shuffles of the form:
25120// shuffle (shuffle V, undef, splat-mask), undef, M
25121// If splat-mask contains undef elements, we need to be careful about
25122// introducing undef's in the folded mask which are not the result of composing
25123// the masks of the shuffles.
25125 SelectionDAG &DAG) {
25126 EVT VT = Shuf->getValueType(0);
25127 unsigned NumElts = VT.getVectorNumElements();
25128
25129 if (!Shuf->getOperand(1).isUndef())
25130 return SDValue();
25131
25132 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25133 // in disguise, with all demanded elements being identical.
25134 // FIXME: this can be done per-operand.
25135 if (!Shuf->isSplat()) {
25136 APInt DemandedElts(NumElts, 0);
25137 for (int Idx : Shuf->getMask()) {
25138 if (Idx < 0)
25139 continue; // Ignore sentinel indices.
25140 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25141 DemandedElts.setBit(Idx);
25142 }
25143 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25144 APInt UndefElts;
25145 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25146 // Even if all demanded elements are splat, some of them could be undef.
25147 // Which lowest demanded element is *not* known-undef?
25148 std::optional<unsigned> MinNonUndefIdx;
25149 for (int Idx : Shuf->getMask()) {
25150 if (Idx < 0 || UndefElts[Idx])
25151 continue; // Ignore sentinel indices, and undef elements.
25152 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25153 }
25154 if (!MinNonUndefIdx)
25155 return DAG.getUNDEF(VT); // All undef - result is undef.
25156 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25157 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25158 Shuf->getMask().end());
25159 for (int &Idx : SplatMask) {
25160 if (Idx < 0)
25161 continue; // Passthrough sentinel indices.
25162 // Otherwise, just pick the lowest demanded non-undef element.
25163 // Or sentinel undef, if we know we'd pick a known-undef element.
25164 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25165 }
25166 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25167 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25168 Shuf->getOperand(1), SplatMask);
25169 }
25170 }
25171
25172 // If the inner operand is a known splat with no undefs, just return that directly.
25173 // TODO: Create DemandedElts mask from Shuf's mask.
25174 // TODO: Allow undef elements and merge with the shuffle code below.
25175 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25176 return Shuf->getOperand(0);
25177
25178 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25179 if (!Splat || !Splat->isSplat())
25180 return SDValue();
25181
25182 ArrayRef<int> ShufMask = Shuf->getMask();
25183 ArrayRef<int> SplatMask = Splat->getMask();
25184 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25185
25186 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25187 // every undef mask element in the splat-shuffle has a corresponding undef
25188 // element in the user-shuffle's mask or if the composition of mask elements
25189 // would result in undef.
25190 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25191 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25192 // In this case it is not legal to simplify to the splat-shuffle because we
25193 // may be exposing the users of the shuffle an undef element at index 1
25194 // which was not there before the combine.
25195 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25196 // In this case the composition of masks yields SplatMask, so it's ok to
25197 // simplify to the splat-shuffle.
25198 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25199 // In this case the composed mask includes all undef elements of SplatMask
25200 // and in addition sets element zero to undef. It is safe to simplify to
25201 // the splat-shuffle.
25202 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25203 ArrayRef<int> SplatMask) {
25204 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25205 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25206 SplatMask[UserMask[i]] != -1)
25207 return false;
25208 return true;
25209 };
25210 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25211 return Shuf->getOperand(0);
25212
25213 // Create a new shuffle with a mask that is composed of the two shuffles'
25214 // masks.
25215 SmallVector<int, 32> NewMask;
25216 for (int Idx : ShufMask)
25217 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25218
25219 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25220 Splat->getOperand(0), Splat->getOperand(1),
25221 NewMask);
25222}
25223
25224// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25225// the mask can be treated as a larger type.
25227 SelectionDAG &DAG,
25228 const TargetLowering &TLI,
25229 bool LegalOperations) {
25230 SDValue Op0 = SVN->getOperand(0);
25231 SDValue Op1 = SVN->getOperand(1);
25232 EVT VT = SVN->getValueType(0);
25233 if (Op0.getOpcode() != ISD::BITCAST)
25234 return SDValue();
25235 EVT InVT = Op0.getOperand(0).getValueType();
25236 if (!InVT.isVector() ||
25237 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25238 Op1.getOperand(0).getValueType() != InVT)))
25239 return SDValue();
25241 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25242 return SDValue();
25243
25244 int VTLanes = VT.getVectorNumElements();
25245 int InLanes = InVT.getVectorNumElements();
25246 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25247 (LegalOperations &&
25249 return SDValue();
25250 int Factor = VTLanes / InLanes;
25251
25252 // Check that each group of lanes in the mask are either undef or make a valid
25253 // mask for the wider lane type.
25254 ArrayRef<int> Mask = SVN->getMask();
25255 SmallVector<int> NewMask;
25256 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25257 return SDValue();
25258
25259 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25260 return SDValue();
25261
25262 // Create the new shuffle with the new mask and bitcast it back to the
25263 // original type.
25264 SDLoc DL(SVN);
25265 Op0 = Op0.getOperand(0);
25266 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25267 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25268 return DAG.getBitcast(VT, NewShuf);
25269}
25270
25271/// Combine shuffle of shuffle of the form:
25272/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25274 SelectionDAG &DAG) {
25275 if (!OuterShuf->getOperand(1).isUndef())
25276 return SDValue();
25277 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25278 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25279 return SDValue();
25280
25281 ArrayRef<int> OuterMask = OuterShuf->getMask();
25282 ArrayRef<int> InnerMask = InnerShuf->getMask();
25283 unsigned NumElts = OuterMask.size();
25284 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25285 SmallVector<int, 32> CombinedMask(NumElts, -1);
25286 int SplatIndex = -1;
25287 for (unsigned i = 0; i != NumElts; ++i) {
25288 // Undef lanes remain undef.
25289 int OuterMaskElt = OuterMask[i];
25290 if (OuterMaskElt == -1)
25291 continue;
25292
25293 // Peek through the shuffle masks to get the underlying source element.
25294 int InnerMaskElt = InnerMask[OuterMaskElt];
25295 if (InnerMaskElt == -1)
25296 continue;
25297
25298 // Initialize the splatted element.
25299 if (SplatIndex == -1)
25300 SplatIndex = InnerMaskElt;
25301
25302 // Non-matching index - this is not a splat.
25303 if (SplatIndex != InnerMaskElt)
25304 return SDValue();
25305
25306 CombinedMask[i] = InnerMaskElt;
25307 }
25308 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25309 getSplatIndex(CombinedMask) != -1) &&
25310 "Expected a splat mask");
25311
25312 // TODO: The transform may be a win even if the mask is not legal.
25313 EVT VT = OuterShuf->getValueType(0);
25314 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25315 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25316 return SDValue();
25317
25318 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25319 InnerShuf->getOperand(1), CombinedMask);
25320}
25321
25322/// If the shuffle mask is taking exactly one element from the first vector
25323/// operand and passing through all other elements from the second vector
25324/// operand, return the index of the mask element that is choosing an element
25325/// from the first operand. Otherwise, return -1.
25327 int MaskSize = Mask.size();
25328 int EltFromOp0 = -1;
25329 // TODO: This does not match if there are undef elements in the shuffle mask.
25330 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25331 // removing an instruction (a shuffle), but losing the knowledge that some
25332 // vector lanes are not needed.
25333 for (int i = 0; i != MaskSize; ++i) {
25334 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25335 // We're looking for a shuffle of exactly one element from operand 0.
25336 if (EltFromOp0 != -1)
25337 return -1;
25338 EltFromOp0 = i;
25339 } else if (Mask[i] != i + MaskSize) {
25340 // Nothing from operand 1 can change lanes.
25341 return -1;
25342 }
25343 }
25344 return EltFromOp0;
25345}
25346
25347/// If a shuffle inserts exactly one element from a source vector operand into
25348/// another vector operand and we can access the specified element as a scalar,
25349/// then we can eliminate the shuffle.
25351 SelectionDAG &DAG) {
25352 // First, check if we are taking one element of a vector and shuffling that
25353 // element into another vector.
25354 ArrayRef<int> Mask = Shuf->getMask();
25355 SmallVector<int, 16> CommutedMask(Mask);
25356 SDValue Op0 = Shuf->getOperand(0);
25357 SDValue Op1 = Shuf->getOperand(1);
25358 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25359 if (ShufOp0Index == -1) {
25360 // Commute mask and check again.
25362 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25363 if (ShufOp0Index == -1)
25364 return SDValue();
25365 // Commute operands to match the commuted shuffle mask.
25366 std::swap(Op0, Op1);
25367 Mask = CommutedMask;
25368 }
25369
25370 // The shuffle inserts exactly one element from operand 0 into operand 1.
25371 // Now see if we can access that element as a scalar via a real insert element
25372 // instruction.
25373 // TODO: We can try harder to locate the element as a scalar. Examples: it
25374 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25375 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25376 "Shuffle mask value must be from operand 0");
25377 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25378 return SDValue();
25379
25380 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25381 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25382 return SDValue();
25383
25384 // There's an existing insertelement with constant insertion index, so we
25385 // don't need to check the legality/profitability of a replacement operation
25386 // that differs at most in the constant value. The target should be able to
25387 // lower any of those in a similar way. If not, legalization will expand this
25388 // to a scalar-to-vector plus shuffle.
25389 //
25390 // Note that the shuffle may move the scalar from the position that the insert
25391 // element used. Therefore, our new insert element occurs at the shuffle's
25392 // mask index value, not the insert's index value.
25393 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25394 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25395 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25396 Op1, Op0.getOperand(1), NewInsIndex);
25397}
25398
25399/// If we have a unary shuffle of a shuffle, see if it can be folded away
25400/// completely. This has the potential to lose undef knowledge because the first
25401/// shuffle may not have an undef mask element where the second one does. So
25402/// only call this after doing simplifications based on demanded elements.
25404 // shuf (shuf0 X, Y, Mask0), undef, Mask
25405 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25406 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25407 return SDValue();
25408
25409 ArrayRef<int> Mask = Shuf->getMask();
25410 ArrayRef<int> Mask0 = Shuf0->getMask();
25411 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25412 // Ignore undef elements.
25413 if (Mask[i] == -1)
25414 continue;
25415 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25416
25417 // Is the element of the shuffle operand chosen by this shuffle the same as
25418 // the element chosen by the shuffle operand itself?
25419 if (Mask0[Mask[i]] != Mask0[i])
25420 return SDValue();
25421 }
25422 // Every element of this shuffle is identical to the result of the previous
25423 // shuffle, so we can replace this value.
25424 return Shuf->getOperand(0);
25425}
25426
25427SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25428 EVT VT = N->getValueType(0);
25429 unsigned NumElts = VT.getVectorNumElements();
25430
25431 SDValue N0 = N->getOperand(0);
25432 SDValue N1 = N->getOperand(1);
25433
25434 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25435
25436 // Canonicalize shuffle undef, undef -> undef
25437 if (N0.isUndef() && N1.isUndef())
25438 return DAG.getUNDEF(VT);
25439
25440 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25441
25442 // Canonicalize shuffle v, v -> v, undef
25443 if (N0 == N1)
25444 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25445 createUnaryMask(SVN->getMask(), NumElts));
25446
25447 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25448 if (N0.isUndef())
25449 return DAG.getCommutedVectorShuffle(*SVN);
25450
25451 // Remove references to rhs if it is undef
25452 if (N1.isUndef()) {
25453 bool Changed = false;
25454 SmallVector<int, 8> NewMask;
25455 for (unsigned i = 0; i != NumElts; ++i) {
25456 int Idx = SVN->getMaskElt(i);
25457 if (Idx >= (int)NumElts) {
25458 Idx = -1;
25459 Changed = true;
25460 }
25461 NewMask.push_back(Idx);
25462 }
25463 if (Changed)
25464 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25465 }
25466
25467 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25468 return InsElt;
25469
25470 // A shuffle of a single vector that is a splatted value can always be folded.
25471 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25472 return V;
25473
25474 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25475 return V;
25476
25477 // If it is a splat, check if the argument vector is another splat or a
25478 // build_vector.
25479 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25480 int SplatIndex = SVN->getSplatIndex();
25481 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25482 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25483 // splat (vector_bo L, R), Index -->
25484 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25485 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25486 SDLoc DL(N);
25487 EVT EltVT = VT.getScalarType();
25488 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25489 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25490 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25491 SDValue NewBO =
25492 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25493 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25495 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25496 }
25497
25498 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25499 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25500 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25501 N0.hasOneUse()) {
25502 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25503 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25504
25506 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25507 if (Idx->getAPIntValue() == SplatIndex)
25508 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25509
25510 // Look through a bitcast if LE and splatting lane 0, through to a
25511 // scalar_to_vector or a build_vector.
25512 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25513 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25516 EVT N00VT = N0.getOperand(0).getValueType();
25517 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25518 VT.isInteger() && N00VT.isInteger()) {
25519 EVT InVT =
25522 SDLoc(N), InVT);
25523 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25524 }
25525 }
25526 }
25527
25528 // If this is a bit convert that changes the element type of the vector but
25529 // not the number of vector elements, look through it. Be careful not to
25530 // look though conversions that change things like v4f32 to v2f64.
25531 SDNode *V = N0.getNode();
25532 if (V->getOpcode() == ISD::BITCAST) {
25533 SDValue ConvInput = V->getOperand(0);
25534 if (ConvInput.getValueType().isVector() &&
25535 ConvInput.getValueType().getVectorNumElements() == NumElts)
25536 V = ConvInput.getNode();
25537 }
25538
25539 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25540 assert(V->getNumOperands() == NumElts &&
25541 "BUILD_VECTOR has wrong number of operands");
25542 SDValue Base;
25543 bool AllSame = true;
25544 for (unsigned i = 0; i != NumElts; ++i) {
25545 if (!V->getOperand(i).isUndef()) {
25546 Base = V->getOperand(i);
25547 break;
25548 }
25549 }
25550 // Splat of <u, u, u, u>, return <u, u, u, u>
25551 if (!Base.getNode())
25552 return N0;
25553 for (unsigned i = 0; i != NumElts; ++i) {
25554 if (V->getOperand(i) != Base) {
25555 AllSame = false;
25556 break;
25557 }
25558 }
25559 // Splat of <x, x, x, x>, return <x, x, x, x>
25560 if (AllSame)
25561 return N0;
25562
25563 // Canonicalize any other splat as a build_vector.
25564 SDValue Splatted = V->getOperand(SplatIndex);
25565 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25566 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25567
25568 // We may have jumped through bitcasts, so the type of the
25569 // BUILD_VECTOR may not match the type of the shuffle.
25570 if (V->getValueType(0) != VT)
25571 NewBV = DAG.getBitcast(VT, NewBV);
25572 return NewBV;
25573 }
25574 }
25575
25576 // Simplify source operands based on shuffle mask.
25578 return SDValue(N, 0);
25579
25580 // This is intentionally placed after demanded elements simplification because
25581 // it could eliminate knowledge of undef elements created by this shuffle.
25582 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25583 return ShufOp;
25584
25585 // Match shuffles that can be converted to any_vector_extend_in_reg.
25586 if (SDValue V =
25587 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25588 return V;
25589
25590 // Combine "truncate_vector_in_reg" style shuffles.
25591 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25592 return V;
25593
25594 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25595 Level < AfterLegalizeVectorOps &&
25596 (N1.isUndef() ||
25597 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25598 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25599 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25600 return V;
25601 }
25602
25603 // A shuffle of a concat of the same narrow vector can be reduced to use
25604 // only low-half elements of a concat with undef:
25605 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25606 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25607 N0.getNumOperands() == 2 &&
25608 N0.getOperand(0) == N0.getOperand(1)) {
25609 int HalfNumElts = (int)NumElts / 2;
25610 SmallVector<int, 8> NewMask;
25611 for (unsigned i = 0; i != NumElts; ++i) {
25612 int Idx = SVN->getMaskElt(i);
25613 if (Idx >= HalfNumElts) {
25614 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25615 Idx -= HalfNumElts;
25616 }
25617 NewMask.push_back(Idx);
25618 }
25619 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25620 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25621 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25622 N0.getOperand(0), UndefVec);
25623 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25624 }
25625 }
25626
25627 // See if we can replace a shuffle with an insert_subvector.
25628 // e.g. v2i32 into v8i32:
25629 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25630 // --> insert_subvector(lhs,rhs1,4).
25631 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25633 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25634 // Ensure RHS subvectors are legal.
25635 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25636 EVT SubVT = RHS.getOperand(0).getValueType();
25637 int NumSubVecs = RHS.getNumOperands();
25638 int NumSubElts = SubVT.getVectorNumElements();
25639 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25640 if (!TLI.isTypeLegal(SubVT))
25641 return SDValue();
25642
25643 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25644 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25645 return SDValue();
25646
25647 // Search [NumSubElts] spans for RHS sequence.
25648 // TODO: Can we avoid nested loops to increase performance?
25649 SmallVector<int> InsertionMask(NumElts);
25650 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25651 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25652 // Reset mask to identity.
25653 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25654
25655 // Add subvector insertion.
25656 std::iota(InsertionMask.begin() + SubIdx,
25657 InsertionMask.begin() + SubIdx + NumSubElts,
25658 NumElts + (SubVec * NumSubElts));
25659
25660 // See if the shuffle mask matches the reference insertion mask.
25661 bool MatchingShuffle = true;
25662 for (int i = 0; i != (int)NumElts; ++i) {
25663 int ExpectIdx = InsertionMask[i];
25664 int ActualIdx = Mask[i];
25665 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25666 MatchingShuffle = false;
25667 break;
25668 }
25669 }
25670
25671 if (MatchingShuffle)
25672 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25673 RHS.getOperand(SubVec),
25674 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25675 }
25676 }
25677 return SDValue();
25678 };
25679 ArrayRef<int> Mask = SVN->getMask();
25680 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25681 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25682 return InsertN1;
25683 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25684 SmallVector<int> CommuteMask(Mask);
25686 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25687 return InsertN0;
25688 }
25689 }
25690
25691 // If we're not performing a select/blend shuffle, see if we can convert the
25692 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25693 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25694 bool IsInLaneMask = true;
25695 ArrayRef<int> Mask = SVN->getMask();
25696 SmallVector<int, 16> ClearMask(NumElts, -1);
25697 APInt DemandedLHS = APInt::getZero(NumElts);
25698 APInt DemandedRHS = APInt::getZero(NumElts);
25699 for (int I = 0; I != (int)NumElts; ++I) {
25700 int M = Mask[I];
25701 if (M < 0)
25702 continue;
25703 ClearMask[I] = M == I ? I : (I + NumElts);
25704 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25705 if (M != I) {
25706 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25707 Demanded.setBit(M % NumElts);
25708 }
25709 }
25710 // TODO: Should we try to mask with N1 as well?
25711 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25712 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25713 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25714 SDLoc DL(N);
25717 // Transform the type to a legal type so that the buildvector constant
25718 // elements are not illegal. Make sure that the result is larger than the
25719 // original type, incase the value is split into two (eg i64->i32).
25720 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25721 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25722 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25723 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25724 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25725 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25726 for (int I = 0; I != (int)NumElts; ++I)
25727 if (0 <= Mask[I])
25728 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25729
25730 // See if a clear mask is legal instead of going via
25731 // XformToShuffleWithZero which loses UNDEF mask elements.
25732 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25733 return DAG.getBitcast(
25734 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25735 DAG.getConstant(0, DL, IntVT), ClearMask));
25736
25737 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25738 return DAG.getBitcast(
25739 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25740 DAG.getBuildVector(IntVT, DL, AndMask)));
25741 }
25742 }
25743 }
25744
25745 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25746 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25747 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25748 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25749 return Res;
25750
25751 // If this shuffle only has a single input that is a bitcasted shuffle,
25752 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25753 // back to their original types.
25754 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25755 N1.isUndef() && Level < AfterLegalizeVectorOps &&
25756 TLI.isTypeLegal(VT)) {
25757
25759 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25760 EVT SVT = VT.getScalarType();
25761 EVT InnerVT = BC0->getValueType(0);
25762 EVT InnerSVT = InnerVT.getScalarType();
25763
25764 // Determine which shuffle works with the smaller scalar type.
25765 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25766 EVT ScaleSVT = ScaleVT.getScalarType();
25767
25768 if (TLI.isTypeLegal(ScaleVT) &&
25769 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25770 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25771 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25772 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25773
25774 // Scale the shuffle masks to the smaller scalar type.
25775 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25776 SmallVector<int, 8> InnerMask;
25777 SmallVector<int, 8> OuterMask;
25778 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25779 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25780
25781 // Merge the shuffle masks.
25782 SmallVector<int, 8> NewMask;
25783 for (int M : OuterMask)
25784 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25785
25786 // Test for shuffle mask legality over both commutations.
25787 SDValue SV0 = BC0->getOperand(0);
25788 SDValue SV1 = BC0->getOperand(1);
25789 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25790 if (!LegalMask) {
25791 std::swap(SV0, SV1);
25793 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25794 }
25795
25796 if (LegalMask) {
25797 SV0 = DAG.getBitcast(ScaleVT, SV0);
25798 SV1 = DAG.getBitcast(ScaleVT, SV1);
25799 return DAG.getBitcast(
25800 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25801 }
25802 }
25803 }
25804 }
25805
25806 // Match shuffles of bitcasts, so long as the mask can be treated as the
25807 // larger type.
25808 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
25809 return V;
25810
25811 // Compute the combined shuffle mask for a shuffle with SV0 as the first
25812 // operand, and SV1 as the second operand.
25813 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
25814 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
25815 auto MergeInnerShuffle =
25816 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
25817 ShuffleVectorSDNode *OtherSVN, SDValue N1,
25818 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
25819 SmallVectorImpl<int> &Mask) -> bool {
25820 // Don't try to fold splats; they're likely to simplify somehow, or they
25821 // might be free.
25822 if (OtherSVN->isSplat())
25823 return false;
25824
25825 SV0 = SV1 = SDValue();
25826 Mask.clear();
25827
25828 for (unsigned i = 0; i != NumElts; ++i) {
25829 int Idx = SVN->getMaskElt(i);
25830 if (Idx < 0) {
25831 // Propagate Undef.
25832 Mask.push_back(Idx);
25833 continue;
25834 }
25835
25836 if (Commute)
25837 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
25838
25839 SDValue CurrentVec;
25840 if (Idx < (int)NumElts) {
25841 // This shuffle index refers to the inner shuffle N0. Lookup the inner
25842 // shuffle mask to identify which vector is actually referenced.
25843 Idx = OtherSVN->getMaskElt(Idx);
25844 if (Idx < 0) {
25845 // Propagate Undef.
25846 Mask.push_back(Idx);
25847 continue;
25848 }
25849 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
25850 : OtherSVN->getOperand(1);
25851 } else {
25852 // This shuffle index references an element within N1.
25853 CurrentVec = N1;
25854 }
25855
25856 // Simple case where 'CurrentVec' is UNDEF.
25857 if (CurrentVec.isUndef()) {
25858 Mask.push_back(-1);
25859 continue;
25860 }
25861
25862 // Canonicalize the shuffle index. We don't know yet if CurrentVec
25863 // will be the first or second operand of the combined shuffle.
25864 Idx = Idx % NumElts;
25865 if (!SV0.getNode() || SV0 == CurrentVec) {
25866 // Ok. CurrentVec is the left hand side.
25867 // Update the mask accordingly.
25868 SV0 = CurrentVec;
25869 Mask.push_back(Idx);
25870 continue;
25871 }
25872 if (!SV1.getNode() || SV1 == CurrentVec) {
25873 // Ok. CurrentVec is the right hand side.
25874 // Update the mask accordingly.
25875 SV1 = CurrentVec;
25876 Mask.push_back(Idx + NumElts);
25877 continue;
25878 }
25879
25880 // Last chance - see if the vector is another shuffle and if it
25881 // uses one of the existing candidate shuffle ops.
25882 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
25883 int InnerIdx = CurrentSVN->getMaskElt(Idx);
25884 if (InnerIdx < 0) {
25885 Mask.push_back(-1);
25886 continue;
25887 }
25888 SDValue InnerVec = (InnerIdx < (int)NumElts)
25889 ? CurrentSVN->getOperand(0)
25890 : CurrentSVN->getOperand(1);
25891 if (InnerVec.isUndef()) {
25892 Mask.push_back(-1);
25893 continue;
25894 }
25895 InnerIdx %= NumElts;
25896 if (InnerVec == SV0) {
25897 Mask.push_back(InnerIdx);
25898 continue;
25899 }
25900 if (InnerVec == SV1) {
25901 Mask.push_back(InnerIdx + NumElts);
25902 continue;
25903 }
25904 }
25905
25906 // Bail out if we cannot convert the shuffle pair into a single shuffle.
25907 return false;
25908 }
25909
25910 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25911 return true;
25912
25913 // Avoid introducing shuffles with illegal mask.
25914 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25915 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25916 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25917 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
25918 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
25919 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
25920 if (TLI.isShuffleMaskLegal(Mask, VT))
25921 return true;
25922
25923 std::swap(SV0, SV1);
25925 return TLI.isShuffleMaskLegal(Mask, VT);
25926 };
25927
25928 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25929 // Canonicalize shuffles according to rules:
25930 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
25931 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
25932 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
25933 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25935 // The incoming shuffle must be of the same type as the result of the
25936 // current shuffle.
25937 assert(N1->getOperand(0).getValueType() == VT &&
25938 "Shuffle types don't match");
25939
25940 SDValue SV0 = N1->getOperand(0);
25941 SDValue SV1 = N1->getOperand(1);
25942 bool HasSameOp0 = N0 == SV0;
25943 bool IsSV1Undef = SV1.isUndef();
25944 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
25945 // Commute the operands of this shuffle so merging below will trigger.
25946 return DAG.getCommutedVectorShuffle(*SVN);
25947 }
25948
25949 // Canonicalize splat shuffles to the RHS to improve merging below.
25950 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
25951 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
25952 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25953 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
25954 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
25955 return DAG.getCommutedVectorShuffle(*SVN);
25956 }
25957
25958 // Try to fold according to rules:
25959 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25960 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25961 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25962 // Don't try to fold shuffles with illegal type.
25963 // Only fold if this shuffle is the only user of the other shuffle.
25964 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
25965 for (int i = 0; i != 2; ++i) {
25966 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
25967 N->isOnlyUserOf(N->getOperand(i).getNode())) {
25968 // The incoming shuffle must be of the same type as the result of the
25969 // current shuffle.
25970 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
25971 assert(OtherSV->getOperand(0).getValueType() == VT &&
25972 "Shuffle types don't match");
25973
25974 SDValue SV0, SV1;
25976 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
25977 SV0, SV1, Mask)) {
25978 // Check if all indices in Mask are Undef. In case, propagate Undef.
25979 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25980 return DAG.getUNDEF(VT);
25981
25982 return DAG.getVectorShuffle(VT, SDLoc(N),
25983 SV0 ? SV0 : DAG.getUNDEF(VT),
25984 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
25985 }
25986 }
25987 }
25988
25989 // Merge shuffles through binops if we are able to merge it with at least
25990 // one other shuffles.
25991 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
25992 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
25993 unsigned SrcOpcode = N0.getOpcode();
25994 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
25995 (N1.isUndef() ||
25996 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
25997 // Get binop source ops, or just pass on the undef.
25998 SDValue Op00 = N0.getOperand(0);
25999 SDValue Op01 = N0.getOperand(1);
26000 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26001 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26002 // TODO: We might be able to relax the VT check but we don't currently
26003 // have any isBinOp() that has different result/ops VTs so play safe until
26004 // we have test coverage.
26005 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26006 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26007 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26008 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26009 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26010 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26011 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26012 SmallVectorImpl<int> &Mask, bool LeftOp,
26013 bool Commute) {
26014 SDValue InnerN = Commute ? N1 : N0;
26015 SDValue Op0 = LeftOp ? Op00 : Op01;
26016 SDValue Op1 = LeftOp ? Op10 : Op11;
26017 if (Commute)
26018 std::swap(Op0, Op1);
26019 // Only accept the merged shuffle if we don't introduce undef elements,
26020 // or the inner shuffle already contained undef elements.
26021 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26022 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26023 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26024 Mask) &&
26025 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26026 llvm::none_of(Mask, [](int M) { return M < 0; }));
26027 };
26028
26029 // Ensure we don't increase the number of shuffles - we must merge a
26030 // shuffle from at least one of the LHS and RHS ops.
26031 bool MergedLeft = false;
26032 SDValue LeftSV0, LeftSV1;
26033 SmallVector<int, 4> LeftMask;
26034 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26035 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26036 MergedLeft = true;
26037 } else {
26038 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26039 LeftSV0 = Op00, LeftSV1 = Op10;
26040 }
26041
26042 bool MergedRight = false;
26043 SDValue RightSV0, RightSV1;
26044 SmallVector<int, 4> RightMask;
26045 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26046 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26047 MergedRight = true;
26048 } else {
26049 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26050 RightSV0 = Op01, RightSV1 = Op11;
26051 }
26052
26053 if (MergedLeft || MergedRight) {
26054 SDLoc DL(N);
26056 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26057 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26059 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26060 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26061 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26062 }
26063 }
26064 }
26065 }
26066
26067 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26068 return V;
26069
26070 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26071 // Perform this really late, because it could eliminate knowledge
26072 // of undef elements created by this shuffle.
26073 if (Level < AfterLegalizeTypes)
26074 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26075 LegalOperations))
26076 return V;
26077
26078 return SDValue();
26079}
26080
26081SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26082 EVT VT = N->getValueType(0);
26083 if (!VT.isFixedLengthVector())
26084 return SDValue();
26085
26086 // Try to convert a scalar binop with an extracted vector element to a vector
26087 // binop. This is intended to reduce potentially expensive register moves.
26088 // TODO: Check if both operands are extracted.
26089 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26090 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26091 SDValue Scalar = N->getOperand(0);
26092 unsigned Opcode = Scalar.getOpcode();
26093 EVT VecEltVT = VT.getScalarType();
26094 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26095 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26096 Scalar.getOperand(0).getValueType() == VecEltVT &&
26097 Scalar.getOperand(1).getValueType() == VecEltVT &&
26098 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26099 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26100 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26101 // Match an extract element and get a shuffle mask equivalent.
26102 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26103
26104 for (int i : {0, 1}) {
26105 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26106 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26107 SDValue EE = Scalar.getOperand(i);
26108 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26109 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26110 EE.getOperand(0).getValueType() == VT &&
26111 isa<ConstantSDNode>(EE.getOperand(1))) {
26112 // Mask = {ExtractIndex, undef, undef....}
26113 ShufMask[0] = EE.getConstantOperandVal(1);
26114 // Make sure the shuffle is legal if we are crossing lanes.
26115 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26116 SDLoc DL(N);
26117 SDValue V[] = {EE.getOperand(0),
26118 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26119 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26120 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26121 ShufMask);
26122 }
26123 }
26124 }
26125 }
26126
26127 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26128 // with a VECTOR_SHUFFLE and possible truncate.
26129 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26130 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26131 return SDValue();
26132
26133 // If we have an implicit truncate, truncate here if it is legal.
26134 if (VecEltVT != Scalar.getValueType() &&
26135 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26136 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26137 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26138 }
26139
26140 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26141 if (!ExtIndexC)
26142 return SDValue();
26143
26144 SDValue SrcVec = Scalar.getOperand(0);
26145 EVT SrcVT = SrcVec.getValueType();
26146 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26147 unsigned VTNumElts = VT.getVectorNumElements();
26148 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26149 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26150 SmallVector<int, 8> Mask(SrcNumElts, -1);
26151 Mask[0] = ExtIndexC->getZExtValue();
26152 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26153 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26154 if (!LegalShuffle)
26155 return SDValue();
26156
26157 // If the initial vector is the same size, the shuffle is the result.
26158 if (VT == SrcVT)
26159 return LegalShuffle;
26160
26161 // If not, shorten the shuffled vector.
26162 if (VTNumElts != SrcNumElts) {
26163 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26164 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26165 SrcVT.getVectorElementType(), VTNumElts);
26166 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26167 ZeroIdx);
26168 }
26169 }
26170
26171 return SDValue();
26172}
26173
26174SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26175 EVT VT = N->getValueType(0);
26176 SDValue N0 = N->getOperand(0);
26177 SDValue N1 = N->getOperand(1);
26178 SDValue N2 = N->getOperand(2);
26179 uint64_t InsIdx = N->getConstantOperandVal(2);
26180
26181 // If inserting an UNDEF, just return the original vector.
26182 if (N1.isUndef())
26183 return N0;
26184
26185 // If this is an insert of an extracted vector into an undef vector, we can
26186 // just use the input to the extract if the types match, and can simplify
26187 // in some cases even if they don't.
26188 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26189 N1.getOperand(1) == N2) {
26190 EVT SrcVT = N1.getOperand(0).getValueType();
26191 if (SrcVT == VT)
26192 return N1.getOperand(0);
26193 // TODO: To remove the zero check, need to adjust the offset to
26194 // a multiple of the new src type.
26195 if (isNullConstant(N2) &&
26196 VT.isScalableVector() == SrcVT.isScalableVector()) {
26198 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26199 VT, N0, N1.getOperand(0), N2);
26200 else
26202 VT, N1.getOperand(0), N2);
26203 }
26204 }
26205
26206 // Handle case where we've ended up inserting back into the source vector
26207 // we extracted the subvector from.
26208 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26209 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26210 N1.getOperand(1) == N2)
26211 return N0;
26212
26213 // Simplify scalar inserts into an undef vector:
26214 // insert_subvector undef, (splat X), N2 -> splat X
26215 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26216 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26217 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26218
26219 // If we are inserting a bitcast value into an undef, with the same
26220 // number of elements, just use the bitcast input of the extract.
26221 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26222 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26223 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26225 N1.getOperand(0).getOperand(1) == N2 &&
26227 VT.getVectorElementCount() &&
26229 VT.getSizeInBits()) {
26230 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26231 }
26232
26233 // If both N1 and N2 are bitcast values on which insert_subvector
26234 // would makes sense, pull the bitcast through.
26235 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26236 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26237 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26238 SDValue CN0 = N0.getOperand(0);
26239 SDValue CN1 = N1.getOperand(0);
26240 EVT CN0VT = CN0.getValueType();
26241 EVT CN1VT = CN1.getValueType();
26242 if (CN0VT.isVector() && CN1VT.isVector() &&
26243 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26245 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26246 CN0.getValueType(), CN0, CN1, N2);
26247 return DAG.getBitcast(VT, NewINSERT);
26248 }
26249 }
26250
26251 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26252 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26253 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26254 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26255 N0.getOperand(1).getValueType() == N1.getValueType() &&
26256 N0.getOperand(2) == N2)
26257 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26258 N1, N2);
26259
26260 // Eliminate an intermediate insert into an undef vector:
26261 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26262 // insert_subvector undef, X, 0
26263 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26264 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26265 isNullConstant(N2))
26266 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26267 N1.getOperand(1), N2);
26268
26269 // Push subvector bitcasts to the output, adjusting the index as we go.
26270 // insert_subvector(bitcast(v), bitcast(s), c1)
26271 // -> bitcast(insert_subvector(v, s, c2))
26272 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26273 N1.getOpcode() == ISD::BITCAST) {
26274 SDValue N0Src = peekThroughBitcasts(N0);
26275 SDValue N1Src = peekThroughBitcasts(N1);
26276 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26277 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26278 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26279 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26280 EVT NewVT;
26281 SDLoc DL(N);
26282 SDValue NewIdx;
26283 LLVMContext &Ctx = *DAG.getContext();
26284 ElementCount NumElts = VT.getVectorElementCount();
26285 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26286 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26287 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26288 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26289 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26290 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26291 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26292 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26293 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26294 NumElts.divideCoefficientBy(Scale));
26295 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26296 }
26297 }
26298 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26299 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26300 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26301 return DAG.getBitcast(VT, Res);
26302 }
26303 }
26304 }
26305
26306 // Canonicalize insert_subvector dag nodes.
26307 // Example:
26308 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26309 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26310 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26311 N1.getValueType() == N0.getOperand(1).getValueType()) {
26312 unsigned OtherIdx = N0.getConstantOperandVal(2);
26313 if (InsIdx < OtherIdx) {
26314 // Swap nodes.
26315 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26316 N0.getOperand(0), N1, N2);
26317 AddToWorklist(NewOp.getNode());
26318 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26319 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26320 }
26321 }
26322
26323 // If the input vector is a concatenation, and the insert replaces
26324 // one of the pieces, we can optimize into a single concat_vectors.
26325 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26326 N0.getOperand(0).getValueType() == N1.getValueType() &&
26329 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26330 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26331 Ops[InsIdx / Factor] = N1;
26332 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26333 }
26334
26335 // Simplify source operands based on insertion.
26337 return SDValue(N, 0);
26338
26339 return SDValue();
26340}
26341
26342SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26343 SDValue N0 = N->getOperand(0);
26344
26345 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26346 if (N0->getOpcode() == ISD::FP16_TO_FP)
26347 return N0->getOperand(0);
26348
26349 return SDValue();
26350}
26351
26352SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26353 auto Op = N->getOpcode();
26355 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26356 SDValue N0 = N->getOperand(0);
26357
26358 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26359 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26360 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26362 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26363 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26364 }
26365 }
26366
26367 return SDValue();
26368}
26369
26370SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26371 SDValue N0 = N->getOperand(0);
26372
26373 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26374 if (N0->getOpcode() == ISD::BF16_TO_FP)
26375 return N0->getOperand(0);
26376
26377 return SDValue();
26378}
26379
26380SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26381 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26382 return visitFP16_TO_FP(N);
26383}
26384
26385SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26386 SDValue N0 = N->getOperand(0);
26387 EVT VT = N0.getValueType();
26388 unsigned Opcode = N->getOpcode();
26389
26390 // VECREDUCE over 1-element vector is just an extract.
26391 if (VT.getVectorElementCount().isScalar()) {
26392 SDLoc dl(N);
26393 SDValue Res =
26395 DAG.getVectorIdxConstant(0, dl));
26396 if (Res.getValueType() != N->getValueType(0))
26397 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26398 return Res;
26399 }
26400
26401 // On an boolean vector an and/or reduction is the same as a umin/umax
26402 // reduction. Convert them if the latter is legal while the former isn't.
26403 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26404 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26406 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26407 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26409 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26410 }
26411
26412 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26413 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26414 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26415 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26416 SDValue Vec = N0.getOperand(0);
26417 SDValue Subvec = N0.getOperand(1);
26418 if ((Opcode == ISD::VECREDUCE_OR &&
26419 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26420 (Opcode == ISD::VECREDUCE_AND &&
26421 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26422 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26423 }
26424
26425 return SDValue();
26426}
26427
26428SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26429 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26430
26431 // FSUB -> FMA combines:
26432 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26433 AddToWorklist(Fused.getNode());
26434 return Fused;
26435 }
26436 return SDValue();
26437}
26438
26439SDValue DAGCombiner::visitVPOp(SDNode *N) {
26440
26441 if (N->getOpcode() == ISD::VP_GATHER)
26442 if (SDValue SD = visitVPGATHER(N))
26443 return SD;
26444
26445 if (N->getOpcode() == ISD::VP_SCATTER)
26446 if (SDValue SD = visitVPSCATTER(N))
26447 return SD;
26448
26449 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26450 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26451 return SD;
26452
26453 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26454 if (SDValue SD = visitVP_STRIDED_STORE(N))
26455 return SD;
26456
26457 // VP operations in which all vector elements are disabled - either by
26458 // determining that the mask is all false or that the EVL is 0 - can be
26459 // eliminated.
26460 bool AreAllEltsDisabled = false;
26461 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26462 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26463 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26464 AreAllEltsDisabled |=
26465 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26466
26467 // This is the only generic VP combine we support for now.
26468 if (!AreAllEltsDisabled) {
26469 switch (N->getOpcode()) {
26470 case ISD::VP_FADD:
26471 return visitVP_FADD(N);
26472 case ISD::VP_FSUB:
26473 return visitVP_FSUB(N);
26474 case ISD::VP_FMA:
26475 return visitFMA<VPMatchContext>(N);
26476 case ISD::VP_SELECT:
26477 return visitVP_SELECT(N);
26478 }
26479 return SDValue();
26480 }
26481
26482 // Binary operations can be replaced by UNDEF.
26483 if (ISD::isVPBinaryOp(N->getOpcode()))
26484 return DAG.getUNDEF(N->getValueType(0));
26485
26486 // VP Memory operations can be replaced by either the chain (stores) or the
26487 // chain + undef (loads).
26488 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26489 if (MemSD->writeMem())
26490 return MemSD->getChain();
26491 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26492 }
26493
26494 // Reduction operations return the start operand when no elements are active.
26495 if (ISD::isVPReduction(N->getOpcode()))
26496 return N->getOperand(0);
26497
26498 return SDValue();
26499}
26500
26501SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26502 SDValue Chain = N->getOperand(0);
26503 SDValue Ptr = N->getOperand(1);
26504 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26505
26506 // Check if the memory, where FP state is written to, is used only in a single
26507 // load operation.
26508 LoadSDNode *LdNode = nullptr;
26509 for (auto *U : Ptr->uses()) {
26510 if (U == N)
26511 continue;
26512 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26513 if (LdNode && LdNode != Ld)
26514 return SDValue();
26515 LdNode = Ld;
26516 continue;
26517 }
26518 return SDValue();
26519 }
26520 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26521 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26523 return SDValue();
26524
26525 // Check if the loaded value is used only in a store operation.
26526 StoreSDNode *StNode = nullptr;
26527 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26528 SDUse &U = I.getUse();
26529 if (U.getResNo() == 0) {
26530 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26531 if (StNode)
26532 return SDValue();
26533 StNode = St;
26534 } else {
26535 return SDValue();
26536 }
26537 }
26538 }
26539 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26540 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26541 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26542 return SDValue();
26543
26544 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26545 // environment.
26546 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26547 StNode->getMemOperand());
26548 CombineTo(StNode, Res, false);
26549 return Res;
26550}
26551
26552SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26553 SDValue Chain = N->getOperand(0);
26554 SDValue Ptr = N->getOperand(1);
26555 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26556
26557 // Check if the address of FP state is used also in a store operation only.
26558 StoreSDNode *StNode = nullptr;
26559 for (auto *U : Ptr->uses()) {
26560 if (U == N)
26561 continue;
26562 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26563 if (StNode && StNode != St)
26564 return SDValue();
26565 StNode = St;
26566 continue;
26567 }
26568 return SDValue();
26569 }
26570 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26571 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26572 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26573 return SDValue();
26574
26575 // Check if the stored value is loaded from some location and the loaded
26576 // value is used only in the store operation.
26577 SDValue StValue = StNode->getValue();
26578 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26579 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26580 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26581 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26582 return SDValue();
26583
26584 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26585 // environment.
26586 SDValue Res =
26587 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26588 LdNode->getMemOperand());
26589 return Res;
26590}
26591
26592/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26593/// with the destination vector and a zero vector.
26594/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26595/// vector_shuffle V, Zero, <0, 4, 2, 4>
26596SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26597 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26598
26599 EVT VT = N->getValueType(0);
26600 SDValue LHS = N->getOperand(0);
26601 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26602 SDLoc DL(N);
26603
26604 // Make sure we're not running after operation legalization where it
26605 // may have custom lowered the vector shuffles.
26606 if (LegalOperations)
26607 return SDValue();
26608
26609 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26610 return SDValue();
26611
26612 EVT RVT = RHS.getValueType();
26613 unsigned NumElts = RHS.getNumOperands();
26614
26615 // Attempt to create a valid clear mask, splitting the mask into
26616 // sub elements and checking to see if each is
26617 // all zeros or all ones - suitable for shuffle masking.
26618 auto BuildClearMask = [&](int Split) {
26619 int NumSubElts = NumElts * Split;
26620 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26621
26622 SmallVector<int, 8> Indices;
26623 for (int i = 0; i != NumSubElts; ++i) {
26624 int EltIdx = i / Split;
26625 int SubIdx = i % Split;
26626 SDValue Elt = RHS.getOperand(EltIdx);
26627 // X & undef --> 0 (not undef). So this lane must be converted to choose
26628 // from the zero constant vector (same as if the element had all 0-bits).
26629 if (Elt.isUndef()) {
26630 Indices.push_back(i + NumSubElts);
26631 continue;
26632 }
26633
26634 APInt Bits;
26635 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26636 Bits = Cst->getAPIntValue();
26637 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26638 Bits = CstFP->getValueAPF().bitcastToAPInt();
26639 else
26640 return SDValue();
26641
26642 // Extract the sub element from the constant bit mask.
26643 if (DAG.getDataLayout().isBigEndian())
26644 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26645 else
26646 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26647
26648 if (Bits.isAllOnes())
26649 Indices.push_back(i);
26650 else if (Bits == 0)
26651 Indices.push_back(i + NumSubElts);
26652 else
26653 return SDValue();
26654 }
26655
26656 // Let's see if the target supports this vector_shuffle.
26657 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26658 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26659 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26660 return SDValue();
26661
26662 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26663 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26664 DAG.getBitcast(ClearVT, LHS),
26665 Zero, Indices));
26666 };
26667
26668 // Determine maximum split level (byte level masking).
26669 int MaxSplit = 1;
26670 if (RVT.getScalarSizeInBits() % 8 == 0)
26671 MaxSplit = RVT.getScalarSizeInBits() / 8;
26672
26673 for (int Split = 1; Split <= MaxSplit; ++Split)
26674 if (RVT.getScalarSizeInBits() % Split == 0)
26675 if (SDValue S = BuildClearMask(Split))
26676 return S;
26677
26678 return SDValue();
26679}
26680
26681/// If a vector binop is performed on splat values, it may be profitable to
26682/// extract, scalarize, and insert/splat.
26684 const SDLoc &DL) {
26685 SDValue N0 = N->getOperand(0);
26686 SDValue N1 = N->getOperand(1);
26687 unsigned Opcode = N->getOpcode();
26688 EVT VT = N->getValueType(0);
26689 EVT EltVT = VT.getVectorElementType();
26690 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26691
26692 // TODO: Remove/replace the extract cost check? If the elements are available
26693 // as scalars, then there may be no extract cost. Should we ask if
26694 // inserting a scalar back into a vector is cheap instead?
26695 int Index0, Index1;
26696 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26697 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26698 // Extract element from splat_vector should be free.
26699 // TODO: use DAG.isSplatValue instead?
26700 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26702 if (!Src0 || !Src1 || Index0 != Index1 ||
26703 Src0.getValueType().getVectorElementType() != EltVT ||
26704 Src1.getValueType().getVectorElementType() != EltVT ||
26705 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26706 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26707 return SDValue();
26708
26709 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26710 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26711 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26712 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26713
26714 // If all lanes but 1 are undefined, no need to splat the scalar result.
26715 // TODO: Keep track of undefs and use that info in the general case.
26716 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26717 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26718 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26719 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26720 // build_vec ..undef, (bo X, Y), undef...
26722 Ops[Index0] = ScalarBO;
26723 return DAG.getBuildVector(VT, DL, Ops);
26724 }
26725
26726 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26727 return DAG.getSplat(VT, DL, ScalarBO);
26728}
26729
26730/// Visit a vector cast operation, like FP_EXTEND.
26731SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26732 EVT VT = N->getValueType(0);
26733 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26734 EVT EltVT = VT.getVectorElementType();
26735 unsigned Opcode = N->getOpcode();
26736
26737 SDValue N0 = N->getOperand(0);
26738 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26739
26740 // TODO: promote operation might be also good here?
26741 int Index0;
26742 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26743 if (Src0 &&
26744 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26745 TLI.isExtractVecEltCheap(VT, Index0)) &&
26746 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26747 TLI.preferScalarizeSplat(N)) {
26748 EVT SrcVT = N0.getValueType();
26749 EVT SrcEltVT = SrcVT.getVectorElementType();
26750 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26751 SDValue Elt =
26752 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26753 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26754 if (VT.isScalableVector())
26755 return DAG.getSplatVector(VT, DL, ScalarBO);
26757 return DAG.getBuildVector(VT, DL, Ops);
26758 }
26759
26760 return SDValue();
26761}
26762
26763/// Visit a binary vector operation, like ADD.
26764SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26765 EVT VT = N->getValueType(0);
26766 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26767
26768 SDValue LHS = N->getOperand(0);
26769 SDValue RHS = N->getOperand(1);
26770 unsigned Opcode = N->getOpcode();
26771 SDNodeFlags Flags = N->getFlags();
26772
26773 // Move unary shuffles with identical masks after a vector binop:
26774 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26775 // --> shuffle (VBinOp A, B), Undef, Mask
26776 // This does not require type legality checks because we are creating the
26777 // same types of operations that are in the original sequence. We do have to
26778 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26779 // though. This code is adapted from the identical transform in instcombine.
26780 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26781 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26782 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26783 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26784 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26785 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26786 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26787 RHS.getOperand(0), Flags);
26788 SDValue UndefV = LHS.getOperand(1);
26789 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26790 }
26791
26792 // Try to sink a splat shuffle after a binop with a uniform constant.
26793 // This is limited to cases where neither the shuffle nor the constant have
26794 // undefined elements because that could be poison-unsafe or inhibit
26795 // demanded elements analysis. It is further limited to not change a splat
26796 // of an inserted scalar because that may be optimized better by
26797 // load-folding or other target-specific behaviors.
26798 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26799 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26800 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26801 // binop (splat X), (splat C) --> splat (binop X, C)
26802 SDValue X = Shuf0->getOperand(0);
26803 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26804 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26805 Shuf0->getMask());
26806 }
26807 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
26808 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
26809 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26810 // binop (splat C), (splat X) --> splat (binop C, X)
26811 SDValue X = Shuf1->getOperand(0);
26812 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
26813 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26814 Shuf1->getMask());
26815 }
26816 }
26817
26818 // The following pattern is likely to emerge with vector reduction ops. Moving
26819 // the binary operation ahead of insertion may allow using a narrower vector
26820 // instruction that has better performance than the wide version of the op:
26821 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
26822 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
26823 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
26824 LHS.getOperand(2) == RHS.getOperand(2) &&
26825 (LHS.hasOneUse() || RHS.hasOneUse())) {
26826 SDValue X = LHS.getOperand(1);
26827 SDValue Y = RHS.getOperand(1);
26828 SDValue Z = LHS.getOperand(2);
26829 EVT NarrowVT = X.getValueType();
26830 if (NarrowVT == Y.getValueType() &&
26831 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
26832 LegalOperations)) {
26833 // (binop undef, undef) may not return undef, so compute that result.
26834 SDValue VecC =
26835 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
26836 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
26837 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
26838 }
26839 }
26840
26841 // Make sure all but the first op are undef or constant.
26842 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
26843 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
26844 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
26845 return Op.isUndef() ||
26846 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
26847 });
26848 };
26849
26850 // The following pattern is likely to emerge with vector reduction ops. Moving
26851 // the binary operation ahead of the concat may allow using a narrower vector
26852 // instruction that has better performance than the wide version of the op:
26853 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
26854 // concat (VBinOp X, Y), VecC
26855 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
26856 (LHS.hasOneUse() || RHS.hasOneUse())) {
26857 EVT NarrowVT = LHS.getOperand(0).getValueType();
26858 if (NarrowVT == RHS.getOperand(0).getValueType() &&
26859 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
26860 unsigned NumOperands = LHS.getNumOperands();
26861 SmallVector<SDValue, 4> ConcatOps;
26862 for (unsigned i = 0; i != NumOperands; ++i) {
26863 // This constant fold for operands 1 and up.
26864 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
26865 RHS.getOperand(i)));
26866 }
26867
26868 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
26869 }
26870 }
26871
26872 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
26873 return V;
26874
26875 return SDValue();
26876}
26877
26878SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
26879 SDValue N2) {
26880 assert(N0.getOpcode() == ISD::SETCC &&
26881 "First argument must be a SetCC node!");
26882
26883 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
26884 cast<CondCodeSDNode>(N0.getOperand(2))->get());
26885
26886 // If we got a simplified select_cc node back from SimplifySelectCC, then
26887 // break it down into a new SETCC node, and a new SELECT node, and then return
26888 // the SELECT node, since we were called with a SELECT node.
26889 if (SCC.getNode()) {
26890 // Check to see if we got a select_cc back (to turn into setcc/select).
26891 // Otherwise, just return whatever node we got back, like fabs.
26892 if (SCC.getOpcode() == ISD::SELECT_CC) {
26893 const SDNodeFlags Flags = N0->getFlags();
26895 N0.getValueType(),
26896 SCC.getOperand(0), SCC.getOperand(1),
26897 SCC.getOperand(4), Flags);
26898 AddToWorklist(SETCC.getNode());
26899 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
26900 SCC.getOperand(2), SCC.getOperand(3));
26901 SelectNode->setFlags(Flags);
26902 return SelectNode;
26903 }
26904
26905 return SCC;
26906 }
26907 return SDValue();
26908}
26909
26910/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
26911/// being selected between, see if we can simplify the select. Callers of this
26912/// should assume that TheSelect is deleted if this returns true. As such, they
26913/// should return the appropriate thing (e.g. the node) back to the top-level of
26914/// the DAG combiner loop to avoid it being looked at.
26915bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
26916 SDValue RHS) {
26917 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26918 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
26919 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
26920 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
26921 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
26922 SDValue Sqrt = RHS;
26924 SDValue CmpLHS;
26925 const ConstantFPSDNode *Zero = nullptr;
26926
26927 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
26928 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
26929 CmpLHS = TheSelect->getOperand(0);
26930 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
26931 } else {
26932 // SELECT or VSELECT
26933 SDValue Cmp = TheSelect->getOperand(0);
26934 if (Cmp.getOpcode() == ISD::SETCC) {
26935 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
26936 CmpLHS = Cmp.getOperand(0);
26937 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
26938 }
26939 }
26940 if (Zero && Zero->isZero() &&
26941 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
26942 CC == ISD::SETULT || CC == ISD::SETLT)) {
26943 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26944 CombineTo(TheSelect, Sqrt);
26945 return true;
26946 }
26947 }
26948 }
26949 // Cannot simplify select with vector condition
26950 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
26951
26952 // If this is a select from two identical things, try to pull the operation
26953 // through the select.
26954 if (LHS.getOpcode() != RHS.getOpcode() ||
26955 !LHS.hasOneUse() || !RHS.hasOneUse())
26956 return false;
26957
26958 // If this is a load and the token chain is identical, replace the select
26959 // of two loads with a load through a select of the address to load from.
26960 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
26961 // constants have been dropped into the constant pool.
26962 if (LHS.getOpcode() == ISD::LOAD) {
26963 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
26964 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
26965
26966 // Token chains must be identical.
26967 if (LHS.getOperand(0) != RHS.getOperand(0) ||
26968 // Do not let this transformation reduce the number of volatile loads.
26969 // Be conservative for atomics for the moment
26970 // TODO: This does appear to be legal for unordered atomics (see D66309)
26971 !LLD->isSimple() || !RLD->isSimple() ||
26972 // FIXME: If either is a pre/post inc/dec load,
26973 // we'd need to split out the address adjustment.
26974 LLD->isIndexed() || RLD->isIndexed() ||
26975 // If this is an EXTLOAD, the VT's must match.
26976 LLD->getMemoryVT() != RLD->getMemoryVT() ||
26977 // If this is an EXTLOAD, the kind of extension must match.
26978 (LLD->getExtensionType() != RLD->getExtensionType() &&
26979 // The only exception is if one of the extensions is anyext.
26980 LLD->getExtensionType() != ISD::EXTLOAD &&
26981 RLD->getExtensionType() != ISD::EXTLOAD) ||
26982 // FIXME: this discards src value information. This is
26983 // over-conservative. It would be beneficial to be able to remember
26984 // both potential memory locations. Since we are discarding
26985 // src value info, don't do the transformation if the memory
26986 // locations are not in the default address space.
26987 LLD->getPointerInfo().getAddrSpace() != 0 ||
26988 RLD->getPointerInfo().getAddrSpace() != 0 ||
26989 // We can't produce a CMOV of a TargetFrameIndex since we won't
26990 // generate the address generation required.
26993 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
26994 LLD->getBasePtr().getValueType()))
26995 return false;
26996
26997 // The loads must not depend on one another.
26998 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
26999 return false;
27000
27001 // Check that the select condition doesn't reach either load. If so,
27002 // folding this will induce a cycle into the DAG. If not, this is safe to
27003 // xform, so create a select of the addresses.
27004
27007
27008 // Always fail if LLD and RLD are not independent. TheSelect is a
27009 // predecessor to all Nodes in question so we need not search past it.
27010
27011 Visited.insert(TheSelect);
27012 Worklist.push_back(LLD);
27013 Worklist.push_back(RLD);
27014
27015 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27016 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27017 return false;
27018
27019 SDValue Addr;
27020 if (TheSelect->getOpcode() == ISD::SELECT) {
27021 // We cannot do this optimization if any pair of {RLD, LLD} is a
27022 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27023 // Loads, we only need to check if CondNode is a successor to one of the
27024 // loads. We can further avoid this if there's no use of their chain
27025 // value.
27026 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27027 Worklist.push_back(CondNode);
27028
27029 if ((LLD->hasAnyUseOfValue(1) &&
27030 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27031 (RLD->hasAnyUseOfValue(1) &&
27032 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27033 return false;
27034
27035 Addr = DAG.getSelect(SDLoc(TheSelect),
27036 LLD->getBasePtr().getValueType(),
27037 TheSelect->getOperand(0), LLD->getBasePtr(),
27038 RLD->getBasePtr());
27039 } else { // Otherwise SELECT_CC
27040 // We cannot do this optimization if any pair of {RLD, LLD} is a
27041 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27042 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27043 // one of the loads. We can further avoid this if there's no use of their
27044 // chain value.
27045
27046 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27047 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27048 Worklist.push_back(CondLHS);
27049 Worklist.push_back(CondRHS);
27050
27051 if ((LLD->hasAnyUseOfValue(1) &&
27052 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27053 (RLD->hasAnyUseOfValue(1) &&
27054 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27055 return false;
27056
27057 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27058 LLD->getBasePtr().getValueType(),
27059 TheSelect->getOperand(0),
27060 TheSelect->getOperand(1),
27061 LLD->getBasePtr(), RLD->getBasePtr(),
27062 TheSelect->getOperand(4));
27063 }
27064
27065 SDValue Load;
27066 // It is safe to replace the two loads if they have different alignments,
27067 // but the new load must be the minimum (most restrictive) alignment of the
27068 // inputs.
27069 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27070 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27071 if (!RLD->isInvariant())
27072 MMOFlags &= ~MachineMemOperand::MOInvariant;
27073 if (!RLD->isDereferenceable())
27074 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27075 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27076 // FIXME: Discards pointer and AA info.
27077 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27078 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27079 MMOFlags);
27080 } else {
27081 // FIXME: Discards pointer and AA info.
27082 Load = DAG.getExtLoad(
27084 : LLD->getExtensionType(),
27085 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27086 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27087 }
27088
27089 // Users of the select now use the result of the load.
27090 CombineTo(TheSelect, Load);
27091
27092 // Users of the old loads now use the new load's chain. We know the
27093 // old-load value is dead now.
27094 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27095 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27096 return true;
27097 }
27098
27099 return false;
27100}
27101
27102/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27103/// bitwise 'and'.
27104SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27105 SDValue N1, SDValue N2, SDValue N3,
27106 ISD::CondCode CC) {
27107 // If this is a select where the false operand is zero and the compare is a
27108 // check of the sign bit, see if we can perform the "gzip trick":
27109 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27110 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27111 EVT XType = N0.getValueType();
27112 EVT AType = N2.getValueType();
27113 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27114 return SDValue();
27115
27116 // If the comparison is testing for a positive value, we have to invert
27117 // the sign bit mask, so only do that transform if the target has a bitwise
27118 // 'and not' instruction (the invert is free).
27119 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27120 // (X > -1) ? A : 0
27121 // (X > 0) ? X : 0 <-- This is canonical signed max.
27122 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27123 return SDValue();
27124 } else if (CC == ISD::SETLT) {
27125 // (X < 0) ? A : 0
27126 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27127 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27128 return SDValue();
27129 } else {
27130 return SDValue();
27131 }
27132
27133 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27134 // constant.
27135 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27136 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27137 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27138 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27139 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27140 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27141 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27142 AddToWorklist(Shift.getNode());
27143
27144 if (XType.bitsGT(AType)) {
27145 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27146 AddToWorklist(Shift.getNode());
27147 }
27148
27149 if (CC == ISD::SETGT)
27150 Shift = DAG.getNOT(DL, Shift, AType);
27151
27152 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27153 }
27154 }
27155
27156 unsigned ShCt = XType.getSizeInBits() - 1;
27157 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27158 return SDValue();
27159
27160 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27161 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27162 AddToWorklist(Shift.getNode());
27163
27164 if (XType.bitsGT(AType)) {
27165 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27166 AddToWorklist(Shift.getNode());
27167 }
27168
27169 if (CC == ISD::SETGT)
27170 Shift = DAG.getNOT(DL, Shift, AType);
27171
27172 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27173}
27174
27175// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27176SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27177 SDValue N0 = N->getOperand(0);
27178 SDValue N1 = N->getOperand(1);
27179 SDValue N2 = N->getOperand(2);
27180 SDLoc DL(N);
27181
27182 unsigned BinOpc = N1.getOpcode();
27183 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27184 (N1.getResNo() != N2.getResNo()))
27185 return SDValue();
27186
27187 // The use checks are intentionally on SDNode because we may be dealing
27188 // with opcodes that produce more than one SDValue.
27189 // TODO: Do we really need to check N0 (the condition operand of the select)?
27190 // But removing that clause could cause an infinite loop...
27191 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27192 return SDValue();
27193
27194 // Binops may include opcodes that return multiple values, so all values
27195 // must be created/propagated from the newly created binops below.
27196 SDVTList OpVTs = N1->getVTList();
27197
27198 // Fold select(cond, binop(x, y), binop(z, y))
27199 // --> binop(select(cond, x, z), y)
27200 if (N1.getOperand(1) == N2.getOperand(1)) {
27201 SDValue N10 = N1.getOperand(0);
27202 SDValue N20 = N2.getOperand(0);
27203 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27204 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27205 NewBinOp->setFlags(N1->getFlags());
27206 NewBinOp->intersectFlagsWith(N2->getFlags());
27207 return SDValue(NewBinOp.getNode(), N1.getResNo());
27208 }
27209
27210 // Fold select(cond, binop(x, y), binop(x, z))
27211 // --> binop(x, select(cond, y, z))
27212 if (N1.getOperand(0) == N2.getOperand(0)) {
27213 SDValue N11 = N1.getOperand(1);
27214 SDValue N21 = N2.getOperand(1);
27215 // Second op VT might be different (e.g. shift amount type)
27216 if (N11.getValueType() == N21.getValueType()) {
27217 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27218 SDValue NewBinOp =
27219 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27220 NewBinOp->setFlags(N1->getFlags());
27221 NewBinOp->intersectFlagsWith(N2->getFlags());
27222 return SDValue(NewBinOp.getNode(), N1.getResNo());
27223 }
27224 }
27225
27226 // TODO: Handle isCommutativeBinOp patterns as well?
27227 return SDValue();
27228}
27229
27230// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27231SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27232 SDValue N0 = N->getOperand(0);
27233 EVT VT = N->getValueType(0);
27234 bool IsFabs = N->getOpcode() == ISD::FABS;
27235 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27236
27237 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27238 return SDValue();
27239
27240 SDValue Int = N0.getOperand(0);
27241 EVT IntVT = Int.getValueType();
27242
27243 // The operand to cast should be integer.
27244 if (!IntVT.isInteger() || IntVT.isVector())
27245 return SDValue();
27246
27247 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27248 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27249 APInt SignMask;
27250 if (N0.getValueType().isVector()) {
27251 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27252 // 0x7f...) per element and splat it.
27254 if (IsFabs)
27255 SignMask = ~SignMask;
27256 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27257 } else {
27258 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27259 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27260 if (IsFabs)
27261 SignMask = ~SignMask;
27262 }
27263 SDLoc DL(N0);
27264 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27265 DAG.getConstant(SignMask, DL, IntVT));
27266 AddToWorklist(Int.getNode());
27267 return DAG.getBitcast(VT, Int);
27268}
27269
27270/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27271/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27272/// in it. This may be a win when the constant is not otherwise available
27273/// because it replaces two constant pool loads with one.
27274SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27275 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27276 ISD::CondCode CC) {
27278 return SDValue();
27279
27280 // If we are before legalize types, we want the other legalization to happen
27281 // first (for example, to avoid messing with soft float).
27282 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27283 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27284 EVT VT = N2.getValueType();
27285 if (!TV || !FV || !TLI.isTypeLegal(VT))
27286 return SDValue();
27287
27288 // If a constant can be materialized without loads, this does not make sense.
27290 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27291 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27292 return SDValue();
27293
27294 // If both constants have multiple uses, then we won't need to do an extra
27295 // load. The values are likely around in registers for other users.
27296 if (!TV->hasOneUse() && !FV->hasOneUse())
27297 return SDValue();
27298
27299 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27300 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27301 Type *FPTy = Elts[0]->getType();
27302 const DataLayout &TD = DAG.getDataLayout();
27303
27304 // Create a ConstantArray of the two constants.
27305 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27306 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27307 TD.getPrefTypeAlign(FPTy));
27308 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27309
27310 // Get offsets to the 0 and 1 elements of the array, so we can select between
27311 // them.
27312 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27313 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27314 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27315 SDValue Cond =
27316 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27317 AddToWorklist(Cond.getNode());
27318 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27319 AddToWorklist(CstOffset.getNode());
27320 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27321 AddToWorklist(CPIdx.getNode());
27322 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27324 DAG.getMachineFunction()), Alignment);
27325}
27326
27327/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27328/// where 'cond' is the comparison specified by CC.
27329SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27331 bool NotExtCompare) {
27332 // (x ? y : y) -> y.
27333 if (N2 == N3) return N2;
27334
27335 EVT CmpOpVT = N0.getValueType();
27336 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27337 EVT VT = N2.getValueType();
27338 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27339 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27340 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27341
27342 // Determine if the condition we're dealing with is constant.
27343 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27344 AddToWorklist(SCC.getNode());
27345 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27346 // fold select_cc true, x, y -> x
27347 // fold select_cc false, x, y -> y
27348 return !(SCCC->isZero()) ? N2 : N3;
27349 }
27350 }
27351
27352 if (SDValue V =
27353 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27354 return V;
27355
27356 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27357 return V;
27358
27359 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27360 // where y is has a single bit set.
27361 // A plaintext description would be, we can turn the SELECT_CC into an AND
27362 // when the condition can be materialized as an all-ones register. Any
27363 // single bit-test can be materialized as an all-ones register with
27364 // shift-left and shift-right-arith.
27365 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27366 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27367 SDValue AndLHS = N0->getOperand(0);
27368 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27369 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27370 // Shift the tested bit over the sign bit.
27371 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27372 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27373 unsigned ShCt = AndMask.getBitWidth() - 1;
27374 SDValue ShlAmt =
27375 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27376 getShiftAmountTy(AndLHS.getValueType()));
27377 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27378
27379 // Now arithmetic right shift it all the way over, so the result is
27380 // either all-ones, or zero.
27381 SDValue ShrAmt =
27382 DAG.getConstant(ShCt, SDLoc(Shl),
27384 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27385
27386 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27387 }
27388 }
27389 }
27390
27391 // fold select C, 16, 0 -> shl C, 4
27392 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27393 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27394
27395 if ((Fold || Swap) &&
27396 TLI.getBooleanContents(CmpOpVT) ==
27398 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27399
27400 if (Swap) {
27401 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27402 std::swap(N2C, N3C);
27403 }
27404
27405 // If the caller doesn't want us to simplify this into a zext of a compare,
27406 // don't do it.
27407 if (NotExtCompare && N2C->isOne())
27408 return SDValue();
27409
27410 SDValue Temp, SCC;
27411 // zext (setcc n0, n1)
27412 if (LegalTypes) {
27413 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27414 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27415 } else {
27416 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27417 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27418 }
27419
27420 AddToWorklist(SCC.getNode());
27421 AddToWorklist(Temp.getNode());
27422
27423 if (N2C->isOne())
27424 return Temp;
27425
27426 unsigned ShCt = N2C->getAPIntValue().logBase2();
27427 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27428 return SDValue();
27429
27430 // shl setcc result by log2 n2c
27431 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27432 DAG.getConstant(ShCt, SDLoc(Temp),
27434 }
27435
27436 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27437 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27438 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27439 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27440 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27441 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27442 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27443 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27444 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27445 SDValue ValueOnZero = N2;
27446 SDValue Count = N3;
27447 // If the condition is NE instead of E, swap the operands.
27448 if (CC == ISD::SETNE)
27449 std::swap(ValueOnZero, Count);
27450 // Check if the value on zero is a constant equal to the bits in the type.
27451 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27452 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27453 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27454 // legal, combine to just cttz.
27455 if ((Count.getOpcode() == ISD::CTTZ ||
27456 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27457 N0 == Count.getOperand(0) &&
27458 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27459 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27460 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27461 // legal, combine to just ctlz.
27462 if ((Count.getOpcode() == ISD::CTLZ ||
27463 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27464 N0 == Count.getOperand(0) &&
27465 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27466 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27467 }
27468 }
27469 }
27470
27471 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27472 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27473 if (!NotExtCompare && N1C && N2C && N3C &&
27474 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27475 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27476 (N1C->isZero() && CC == ISD::SETLT)) &&
27477 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27478 SDValue ASR = DAG.getNode(
27479 ISD::SRA, DL, CmpOpVT, N0,
27480 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27481 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27482 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27483 }
27484
27485 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27486 return S;
27487 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27488 return S;
27489
27490 return SDValue();
27491}
27492
27493/// This is a stub for TargetLowering::SimplifySetCC.
27494SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27495 ISD::CondCode Cond, const SDLoc &DL,
27496 bool foldBooleans) {
27498 DagCombineInfo(DAG, Level, false, this);
27499 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27500}
27501
27502/// Given an ISD::SDIV node expressing a divide by constant, return
27503/// a DAG expression to select that will generate the same value by multiplying
27504/// by a magic number.
27505/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27506SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27507 // when optimising for minimum size, we don't want to expand a div to a mul
27508 // and a shift.
27510 return SDValue();
27511
27513 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27514 for (SDNode *N : Built)
27515 AddToWorklist(N);
27516 return S;
27517 }
27518
27519 return SDValue();
27520}
27521
27522/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27523/// DAG expression that will generate the same value by right shifting.
27524SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27525 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27526 if (!C)
27527 return SDValue();
27528
27529 // Avoid division by zero.
27530 if (C->isZero())
27531 return SDValue();
27532
27534 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27535 for (SDNode *N : Built)
27536 AddToWorklist(N);
27537 return S;
27538 }
27539
27540 return SDValue();
27541}
27542
27543/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27544/// expression that will generate the same value by multiplying by a magic
27545/// number.
27546/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27547SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27548 // when optimising for minimum size, we don't want to expand a div to a mul
27549 // and a shift.
27551 return SDValue();
27552
27554 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27555 for (SDNode *N : Built)
27556 AddToWorklist(N);
27557 return S;
27558 }
27559
27560 return SDValue();
27561}
27562
27563/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27564/// return a DAG expression that will generate the same value.
27565SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27566 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27567 if (!C)
27568 return SDValue();
27569
27570 // Avoid division by zero.
27571 if (C->isZero())
27572 return SDValue();
27573
27575 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27576 for (SDNode *N : Built)
27577 AddToWorklist(N);
27578 return S;
27579 }
27580
27581 return SDValue();
27582}
27583
27584// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27585//
27586// Returns the node that represents `Log2(Op)`. This may create a new node. If
27587// we are unable to compute `Log2(Op)` its return `SDValue()`.
27588//
27589// All nodes will be created at `DL` and the output will be of type `VT`.
27590//
27591// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27592// `AssumeNonZero` if this function should simply assume (not require proving
27593// `Op` is non-zero).
27595 SDValue Op, unsigned Depth,
27596 bool AssumeNonZero) {
27597 assert(VT.isInteger() && "Only integer types are supported!");
27598
27599 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27600 while (true) {
27601 switch (V.getOpcode()) {
27602 case ISD::TRUNCATE:
27603 case ISD::ZERO_EXTEND:
27604 V = V.getOperand(0);
27605 break;
27606 default:
27607 return V;
27608 }
27609 }
27610 };
27611
27612 if (VT.isScalableVector())
27613 return SDValue();
27614
27615 Op = PeekThroughCastsAndTrunc(Op);
27616
27617 // Helper for determining whether a value is a power-2 constant scalar or a
27618 // vector of such elements.
27619 SmallVector<APInt> Pow2Constants;
27620 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27621 if (C->isZero() || C->isOpaque())
27622 return false;
27623 // TODO: We may also be able to support negative powers of 2 here.
27624 if (C->getAPIntValue().isPowerOf2()) {
27625 Pow2Constants.emplace_back(C->getAPIntValue());
27626 return true;
27627 }
27628 return false;
27629 };
27630
27631 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27632 if (!VT.isVector())
27633 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27634 // We need to create a build vector
27635 SmallVector<SDValue> Log2Ops;
27636 for (const APInt &Pow2 : Pow2Constants)
27637 Log2Ops.emplace_back(
27638 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27639 return DAG.getBuildVector(VT, DL, Log2Ops);
27640 }
27641
27642 if (Depth >= DAG.MaxRecursionDepth)
27643 return SDValue();
27644
27645 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27646 ToCast = PeekThroughCastsAndTrunc(ToCast);
27647 EVT CurVT = ToCast.getValueType();
27648 if (NewVT == CurVT)
27649 return ToCast;
27650
27651 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27652 return DAG.getBitcast(NewVT, ToCast);
27653
27654 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27655 };
27656
27657 // log2(X << Y) -> log2(X) + Y
27658 if (Op.getOpcode() == ISD::SHL) {
27659 // 1 << Y and X nuw/nsw << Y are all non-zero.
27660 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27661 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27662 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27663 Depth + 1, AssumeNonZero))
27664 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27665 CastToVT(VT, Op.getOperand(1)));
27666 }
27667
27668 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27669 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27670 Op.hasOneUse()) {
27671 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27672 Depth + 1, AssumeNonZero))
27673 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27674 Depth + 1, AssumeNonZero))
27675 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27676 }
27677
27678 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27679 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27680 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27681 Op.hasOneUse()) {
27682 // Use AssumeNonZero as false here. Otherwise we can hit case where
27683 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27684 if (SDValue LogX =
27685 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27686 /*AssumeNonZero*/ false))
27687 if (SDValue LogY =
27688 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27689 /*AssumeNonZero*/ false))
27690 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27691 }
27692
27693 return SDValue();
27694}
27695
27696/// Determines the LogBase2 value for a non-null input value using the
27697/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27698SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27699 bool KnownNonZero, bool InexpensiveOnly,
27700 std::optional<EVT> OutVT) {
27701 EVT VT = OutVT ? *OutVT : V.getValueType();
27702 SDValue InexpensiveLogBase2 =
27703 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27704 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27705 return InexpensiveLogBase2;
27706
27707 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27708 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27709 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27710 return LogBase2;
27711}
27712
27713/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27714/// For the reciprocal, we need to find the zero of the function:
27715/// F(X) = 1/X - A [which has a zero at X = 1/A]
27716/// =>
27717/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27718/// does not require additional intermediate precision]
27719/// For the last iteration, put numerator N into it to gain more precision:
27720/// Result = N X_i + X_i (N - N A X_i)
27721SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27722 SDNodeFlags Flags) {
27723 if (LegalDAG)
27724 return SDValue();
27725
27726 // TODO: Handle extended types?
27727 EVT VT = Op.getValueType();
27728 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27729 VT.getScalarType() != MVT::f64)
27730 return SDValue();
27731
27732 // If estimates are explicitly disabled for this function, we're done.
27734 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27735 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27736 return SDValue();
27737
27738 // Estimates may be explicitly enabled for this type with a custom number of
27739 // refinement steps.
27740 int Iterations = TLI.getDivRefinementSteps(VT, MF);
27741 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27742 AddToWorklist(Est.getNode());
27743
27744 SDLoc DL(Op);
27745 if (Iterations) {
27746 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27747
27748 // Newton iterations: Est = Est + Est (N - Arg * Est)
27749 // If this is the last iteration, also multiply by the numerator.
27750 for (int i = 0; i < Iterations; ++i) {
27751 SDValue MulEst = Est;
27752
27753 if (i == Iterations - 1) {
27754 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27755 AddToWorklist(MulEst.getNode());
27756 }
27757
27758 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27759 AddToWorklist(NewEst.getNode());
27760
27761 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27762 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27763 AddToWorklist(NewEst.getNode());
27764
27765 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27766 AddToWorklist(NewEst.getNode());
27767
27768 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27769 AddToWorklist(Est.getNode());
27770 }
27771 } else {
27772 // If no iterations are available, multiply with N.
27773 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27774 AddToWorklist(Est.getNode());
27775 }
27776
27777 return Est;
27778 }
27779
27780 return SDValue();
27781}
27782
27783/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27784/// For the reciprocal sqrt, we need to find the zero of the function:
27785/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27786/// =>
27787/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27788/// As a result, we precompute A/2 prior to the iteration loop.
27789SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27790 unsigned Iterations,
27791 SDNodeFlags Flags, bool Reciprocal) {
27792 EVT VT = Arg.getValueType();
27793 SDLoc DL(Arg);
27794 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27795
27796 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27797 // this entire sequence requires only one FP constant.
27798 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27799 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27800
27801 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27802 for (unsigned i = 0; i < Iterations; ++i) {
27803 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27804 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27805 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
27806 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27807 }
27808
27809 // If non-reciprocal square root is requested, multiply the result by Arg.
27810 if (!Reciprocal)
27811 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
27812
27813 return Est;
27814}
27815
27816/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27817/// For the reciprocal sqrt, we need to find the zero of the function:
27818/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27819/// =>
27820/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
27821SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
27822 unsigned Iterations,
27823 SDNodeFlags Flags, bool Reciprocal) {
27824 EVT VT = Arg.getValueType();
27825 SDLoc DL(Arg);
27826 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
27827 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
27828
27829 // This routine must enter the loop below to work correctly
27830 // when (Reciprocal == false).
27831 assert(Iterations > 0);
27832
27833 // Newton iterations for reciprocal square root:
27834 // E = (E * -0.5) * ((A * E) * E + -3.0)
27835 for (unsigned i = 0; i < Iterations; ++i) {
27836 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
27837 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
27838 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
27839
27840 // When calculating a square root at the last iteration build:
27841 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
27842 // (notice a common subexpression)
27843 SDValue LHS;
27844 if (Reciprocal || (i + 1) < Iterations) {
27845 // RSQRT: LHS = (E * -0.5)
27846 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
27847 } else {
27848 // SQRT: LHS = (A * E) * -0.5
27849 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
27850 }
27851
27852 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
27853 }
27854
27855 return Est;
27856}
27857
27858/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
27859/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
27860/// Op can be zero.
27861SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
27862 bool Reciprocal) {
27863 if (LegalDAG)
27864 return SDValue();
27865
27866 // TODO: Handle extended types?
27867 EVT VT = Op.getValueType();
27868 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27869 VT.getScalarType() != MVT::f64)
27870 return SDValue();
27871
27872 // If estimates are explicitly disabled for this function, we're done.
27874 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
27875 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27876 return SDValue();
27877
27878 // Estimates may be explicitly enabled for this type with a custom number of
27879 // refinement steps.
27880 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
27881
27882 bool UseOneConstNR = false;
27883 if (SDValue Est =
27884 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
27885 Reciprocal)) {
27886 AddToWorklist(Est.getNode());
27887
27888 if (Iterations > 0)
27889 Est = UseOneConstNR
27890 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
27891 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
27892 if (!Reciprocal) {
27893 SDLoc DL(Op);
27894 // Try the target specific test first.
27895 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
27896
27897 // The estimate is now completely wrong if the input was exactly 0.0 or
27898 // possibly a denormal. Force the answer to 0.0 or value provided by
27899 // target for those cases.
27900 Est = DAG.getNode(
27901 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
27902 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
27903 }
27904 return Est;
27905 }
27906
27907 return SDValue();
27908}
27909
27910SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27911 return buildSqrtEstimateImpl(Op, Flags, true);
27912}
27913
27914SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27915 return buildSqrtEstimateImpl(Op, Flags, false);
27916}
27917
27918/// Return true if there is any possibility that the two addresses overlap.
27919bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
27920
27921 struct MemUseCharacteristics {
27922 bool IsVolatile;
27923 bool IsAtomic;
27925 int64_t Offset;
27926 LocationSize NumBytes;
27927 MachineMemOperand *MMO;
27928 };
27929
27930 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
27931 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
27932 int64_t Offset = 0;
27933 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
27934 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
27935 : (LSN->getAddressingMode() == ISD::PRE_DEC)
27936 ? -1 * C->getSExtValue()
27937 : 0;
27938 TypeSize Size = LSN->getMemoryVT().getStoreSize();
27939 return {LSN->isVolatile(), LSN->isAtomic(),
27940 LSN->getBasePtr(), Offset /*base offset*/,
27941 LocationSize::precise(Size), LSN->getMemOperand()};
27942 }
27943 if (const auto *LN = cast<LifetimeSDNode>(N))
27944 return {false /*isVolatile*/,
27945 /*isAtomic*/ false,
27946 LN->getOperand(1),
27947 (LN->hasOffset()) ? LN->getOffset() : 0,
27948 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
27950 (MachineMemOperand *)nullptr};
27951 // Default.
27952 return {false /*isvolatile*/,
27953 /*isAtomic*/ false,
27954 SDValue(),
27955 (int64_t)0 /*offset*/,
27957 (MachineMemOperand *)nullptr};
27958 };
27959
27960 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
27961 MUC1 = getCharacteristics(Op1);
27962
27963 // If they are to the same address, then they must be aliases.
27964 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
27965 MUC0.Offset == MUC1.Offset)
27966 return true;
27967
27968 // If they are both volatile then they cannot be reordered.
27969 if (MUC0.IsVolatile && MUC1.IsVolatile)
27970 return true;
27971
27972 // Be conservative about atomics for the moment
27973 // TODO: This is way overconservative for unordered atomics (see D66309)
27974 if (MUC0.IsAtomic && MUC1.IsAtomic)
27975 return true;
27976
27977 if (MUC0.MMO && MUC1.MMO) {
27978 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
27979 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
27980 return false;
27981 }
27982
27983 // If NumBytes is scalable and offset is not 0, conservatively return may
27984 // alias
27985 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
27986 MUC0.Offset != 0) ||
27987 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
27988 MUC1.Offset != 0))
27989 return true;
27990 // Try to prove that there is aliasing, or that there is no aliasing. Either
27991 // way, we can return now. If nothing can be proved, proceed with more tests.
27992 bool IsAlias;
27993 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
27994 DAG, IsAlias))
27995 return IsAlias;
27996
27997 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
27998 // either are not known.
27999 if (!MUC0.MMO || !MUC1.MMO)
28000 return true;
28001
28002 // If one operation reads from invariant memory, and the other may store, they
28003 // cannot alias. These should really be checking the equivalent of mayWrite,
28004 // but it only matters for memory nodes other than load /store.
28005 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28006 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28007 return false;
28008
28009 // If we know required SrcValue1 and SrcValue2 have relatively large
28010 // alignment compared to the size and offset of the access, we may be able
28011 // to prove they do not alias. This check is conservative for now to catch
28012 // cases created by splitting vector types, it only works when the offsets are
28013 // multiples of the size of the data.
28014 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28015 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28016 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28017 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28018 LocationSize Size0 = MUC0.NumBytes;
28019 LocationSize Size1 = MUC1.NumBytes;
28020
28021 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28022 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28023 !Size1.isScalable() && Size0 == Size1 &&
28024 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28025 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28026 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28027 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28028 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28029
28030 // There is no overlap between these relatively aligned accesses of
28031 // similar size. Return no alias.
28032 if ((OffAlign0 + static_cast<int64_t>(
28033 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28034 (OffAlign1 + static_cast<int64_t>(
28035 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28036 return false;
28037 }
28038
28039 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28041 : DAG.getSubtarget().useAA();
28042#ifndef NDEBUG
28043 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28045 UseAA = false;
28046#endif
28047
28048 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28049 Size0.hasValue() && Size1.hasValue()) {
28050 // Use alias analysis information.
28051 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28052 int64_t Overlap0 =
28053 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28054 int64_t Overlap1 =
28055 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28056 LocationSize Loc0 =
28057 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28058 LocationSize Loc1 =
28059 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28060 if (AA->isNoAlias(
28061 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28062 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28063 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28064 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28065 return false;
28066 }
28067
28068 // Otherwise we have to assume they alias.
28069 return true;
28070}
28071
28072/// Walk up chain skipping non-aliasing memory nodes,
28073/// looking for aliasing nodes and adding them to the Aliases vector.
28074void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28075 SmallVectorImpl<SDValue> &Aliases) {
28076 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28077 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28078
28079 // Get alias information for node.
28080 // TODO: relax aliasing for unordered atomics (see D66309)
28081 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28082
28083 // Starting off.
28084 Chains.push_back(OriginalChain);
28085 unsigned Depth = 0;
28086
28087 // Attempt to improve chain by a single step
28088 auto ImproveChain = [&](SDValue &C) -> bool {
28089 switch (C.getOpcode()) {
28090 case ISD::EntryToken:
28091 // No need to mark EntryToken.
28092 C = SDValue();
28093 return true;
28094 case ISD::LOAD:
28095 case ISD::STORE: {
28096 // Get alias information for C.
28097 // TODO: Relax aliasing for unordered atomics (see D66309)
28098 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28099 cast<LSBaseSDNode>(C.getNode())->isSimple();
28100 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28101 // Look further up the chain.
28102 C = C.getOperand(0);
28103 return true;
28104 }
28105 // Alias, so stop here.
28106 return false;
28107 }
28108
28109 case ISD::CopyFromReg:
28110 // Always forward past CopyFromReg.
28111 C = C.getOperand(0);
28112 return true;
28113
28115 case ISD::LIFETIME_END: {
28116 // We can forward past any lifetime start/end that can be proven not to
28117 // alias the memory access.
28118 if (!mayAlias(N, C.getNode())) {
28119 // Look further up the chain.
28120 C = C.getOperand(0);
28121 return true;
28122 }
28123 return false;
28124 }
28125 default:
28126 return false;
28127 }
28128 };
28129
28130 // Look at each chain and determine if it is an alias. If so, add it to the
28131 // aliases list. If not, then continue up the chain looking for the next
28132 // candidate.
28133 while (!Chains.empty()) {
28134 SDValue Chain = Chains.pop_back_val();
28135
28136 // Don't bother if we've seen Chain before.
28137 if (!Visited.insert(Chain.getNode()).second)
28138 continue;
28139
28140 // For TokenFactor nodes, look at each operand and only continue up the
28141 // chain until we reach the depth limit.
28142 //
28143 // FIXME: The depth check could be made to return the last non-aliasing
28144 // chain we found before we hit a tokenfactor rather than the original
28145 // chain.
28146 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28147 Aliases.clear();
28148 Aliases.push_back(OriginalChain);
28149 return;
28150 }
28151
28152 if (Chain.getOpcode() == ISD::TokenFactor) {
28153 // We have to check each of the operands of the token factor for "small"
28154 // token factors, so we queue them up. Adding the operands to the queue
28155 // (stack) in reverse order maintains the original order and increases the
28156 // likelihood that getNode will find a matching token factor (CSE.)
28157 if (Chain.getNumOperands() > 16) {
28158 Aliases.push_back(Chain);
28159 continue;
28160 }
28161 for (unsigned n = Chain.getNumOperands(); n;)
28162 Chains.push_back(Chain.getOperand(--n));
28163 ++Depth;
28164 continue;
28165 }
28166 // Everything else
28167 if (ImproveChain(Chain)) {
28168 // Updated Chain Found, Consider new chain if one exists.
28169 if (Chain.getNode())
28170 Chains.push_back(Chain);
28171 ++Depth;
28172 continue;
28173 }
28174 // No Improved Chain Possible, treat as Alias.
28175 Aliases.push_back(Chain);
28176 }
28177}
28178
28179/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28180/// (aliasing node.)
28181SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28182 if (OptLevel == CodeGenOptLevel::None)
28183 return OldChain;
28184
28185 // Ops for replacing token factor.
28187
28188 // Accumulate all the aliases to this node.
28189 GatherAllAliases(N, OldChain, Aliases);
28190
28191 // If no operands then chain to entry token.
28192 if (Aliases.empty())
28193 return DAG.getEntryNode();
28194
28195 // If a single operand then chain to it. We don't need to revisit it.
28196 if (Aliases.size() == 1)
28197 return Aliases[0];
28198
28199 // Construct a custom tailored token factor.
28200 return DAG.getTokenFactor(SDLoc(N), Aliases);
28201}
28202
28203// This function tries to collect a bunch of potentially interesting
28204// nodes to improve the chains of, all at once. This might seem
28205// redundant, as this function gets called when visiting every store
28206// node, so why not let the work be done on each store as it's visited?
28207//
28208// I believe this is mainly important because mergeConsecutiveStores
28209// is unable to deal with merging stores of different sizes, so unless
28210// we improve the chains of all the potential candidates up-front
28211// before running mergeConsecutiveStores, it might only see some of
28212// the nodes that will eventually be candidates, and then not be able
28213// to go from a partially-merged state to the desired final
28214// fully-merged state.
28215
28216bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28217 SmallVector<StoreSDNode *, 8> ChainedStores;
28218 StoreSDNode *STChain = St;
28219 // Intervals records which offsets from BaseIndex have been covered. In
28220 // the common case, every store writes to the immediately previous address
28221 // space and thus merged with the previous interval at insertion time.
28222
28223 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28225 IMap::Allocator A;
28226 IMap Intervals(A);
28227
28228 // This holds the base pointer, index, and the offset in bytes from the base
28229 // pointer.
28231
28232 // We must have a base and an offset.
28233 if (!BasePtr.getBase().getNode())
28234 return false;
28235
28236 // Do not handle stores to undef base pointers.
28237 if (BasePtr.getBase().isUndef())
28238 return false;
28239
28240 // Do not handle stores to opaque types
28241 if (St->getMemoryVT().isZeroSized())
28242 return false;
28243
28244 // BaseIndexOffset assumes that offsets are fixed-size, which
28245 // is not valid for scalable vectors where the offsets are
28246 // scaled by `vscale`, so bail out early.
28247 if (St->getMemoryVT().isScalableVT())
28248 return false;
28249
28250 // Add ST's interval.
28251 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28252 std::monostate{});
28253
28254 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28255 if (Chain->getMemoryVT().isScalableVector())
28256 return false;
28257
28258 // If the chain has more than one use, then we can't reorder the mem ops.
28259 if (!SDValue(Chain, 0)->hasOneUse())
28260 break;
28261 // TODO: Relax for unordered atomics (see D66309)
28262 if (!Chain->isSimple() || Chain->isIndexed())
28263 break;
28264
28265 // Find the base pointer and offset for this memory node.
28266 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28267 // Check that the base pointer is the same as the original one.
28268 int64_t Offset;
28269 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28270 break;
28271 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28272 // Make sure we don't overlap with other intervals by checking the ones to
28273 // the left or right before inserting.
28274 auto I = Intervals.find(Offset);
28275 // If there's a next interval, we should end before it.
28276 if (I != Intervals.end() && I.start() < (Offset + Length))
28277 break;
28278 // If there's a previous interval, we should start after it.
28279 if (I != Intervals.begin() && (--I).stop() <= Offset)
28280 break;
28281 Intervals.insert(Offset, Offset + Length, std::monostate{});
28282
28283 ChainedStores.push_back(Chain);
28284 STChain = Chain;
28285 }
28286
28287 // If we didn't find a chained store, exit.
28288 if (ChainedStores.empty())
28289 return false;
28290
28291 // Improve all chained stores (St and ChainedStores members) starting from
28292 // where the store chain ended and return single TokenFactor.
28293 SDValue NewChain = STChain->getChain();
28295 for (unsigned I = ChainedStores.size(); I;) {
28296 StoreSDNode *S = ChainedStores[--I];
28297 SDValue BetterChain = FindBetterChain(S, NewChain);
28298 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28299 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28300 TFOps.push_back(SDValue(S, 0));
28301 ChainedStores[I] = S;
28302 }
28303
28304 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28305 SDValue BetterChain = FindBetterChain(St, NewChain);
28306 SDValue NewST;
28307 if (St->isTruncatingStore())
28308 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28309 St->getBasePtr(), St->getMemoryVT(),
28310 St->getMemOperand());
28311 else
28312 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28313 St->getBasePtr(), St->getMemOperand());
28314
28315 TFOps.push_back(NewST);
28316
28317 // If we improved every element of TFOps, then we've lost the dependence on
28318 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28319 // the beginning to keep relative order consistent with FindBetterChains.
28320 auto hasImprovedChain = [&](SDValue ST) -> bool {
28321 return ST->getOperand(0) != NewChain;
28322 };
28323 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28324 if (AddNewChain)
28325 TFOps.insert(TFOps.begin(), NewChain);
28326
28327 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28328 CombineTo(St, TF);
28329
28330 // Add TF and its operands to the worklist.
28331 AddToWorklist(TF.getNode());
28332 for (const SDValue &Op : TF->ops())
28333 AddToWorklist(Op.getNode());
28334 AddToWorklist(STChain);
28335 return true;
28336}
28337
28338bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28339 if (OptLevel == CodeGenOptLevel::None)
28340 return false;
28341
28343
28344 // We must have a base and an offset.
28345 if (!BasePtr.getBase().getNode())
28346 return false;
28347
28348 // Do not handle stores to undef base pointers.
28349 if (BasePtr.getBase().isUndef())
28350 return false;
28351
28352 // Directly improve a chain of disjoint stores starting at St.
28353 if (parallelizeChainedStores(St))
28354 return true;
28355
28356 // Improve St's Chain..
28357 SDValue BetterChain = FindBetterChain(St, St->getChain());
28358 if (St->getChain() != BetterChain) {
28359 replaceStoreChain(St, BetterChain);
28360 return true;
28361 }
28362 return false;
28363}
28364
28365/// This is the entry point for the file.
28367 CodeGenOptLevel OptLevel) {
28368 /// This is the main entry point to this class.
28369 DAGCombiner(*this, AA, OptLevel).Run(Level);
28370}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
bool isNegative() const
Definition: APFloat.h:1295
bool isNormal() const
Definition: APFloat.h:1299
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1278
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isLargest() const
Definition: APFloat.h:1311
bool isIEEE() const
Definition: APFloat.h:1313
bool isInfinity() const
Definition: APFloat.h:1292
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:444
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
int32_t exactLogBase2() const
Definition: APInt.h:1725
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
unsigned logBase2() const
Definition: APInt.h:1703
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:307
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:339
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:360
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:953
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:550
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:473
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isADDLike(SDValue Op) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:477
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:447
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:827
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:471
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:658
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:861
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:472
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:542
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:478
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:468
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:844
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:484
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:559
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:553
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:877
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:907
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:217
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:239
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2178
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2183
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2188
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2193
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1376
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:367
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:487
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:985
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:373
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1361
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1365
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1375
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1406
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:662
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1358
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1362
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:758
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1083
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:646
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1243
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1377
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1370
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:984
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ TargetConstantFP
Definition: ISDOpcodes.h:159
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1335
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:359
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1310
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1197
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1378
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:922
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:990
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1013
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:260
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:657
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1359
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:944
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1366
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1076
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1146
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1018
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:313
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1495
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1605
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1580
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1600
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1421
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1491
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1491
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1562
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1478
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1509
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1574
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:919
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:822
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:542
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:840
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(Preds &&...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4504
constexpr double e
Definition: MathExtras.h:31
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:326
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:228
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1525
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:899
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:319
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1475
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:349
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:300
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:296
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:306
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...