LLVM 20.0.0git
RISCVTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/ADT/STLExtras.h"
18#include <cmath>
19#include <optional>
20using namespace llvm;
21using namespace llvm::PatternMatch;
22
23#define DEBUG_TYPE "riscvtti"
24
26 "riscv-v-register-bit-width-lmul",
28 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
29 "by autovectorized code. Fractional LMULs are not supported."),
31
33 "riscv-v-slp-max-vf",
35 "Overrides result used for getMaximumVF query which is used "
36 "exclusively by SLP vectorizer."),
38
40RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
42 // Check if the type is valid for all CostKind
43 if (!VT.isVector())
45 size_t NumInstr = OpCodes.size();
47 return NumInstr;
48 InstructionCost LMULCost = TLI->getLMULCost(VT);
50 return LMULCost * NumInstr;
52 for (auto Op : OpCodes) {
53 switch (Op) {
54 case RISCV::VRGATHER_VI:
55 Cost += TLI->getVRGatherVICost(VT);
56 break;
57 case RISCV::VRGATHER_VV:
58 Cost += TLI->getVRGatherVVCost(VT);
59 break;
60 case RISCV::VSLIDEUP_VI:
61 case RISCV::VSLIDEDOWN_VI:
62 Cost += TLI->getVSlideVICost(VT);
63 break;
64 case RISCV::VSLIDEUP_VX:
65 case RISCV::VSLIDEDOWN_VX:
66 Cost += TLI->getVSlideVXCost(VT);
67 break;
68 case RISCV::VREDMAX_VS:
69 case RISCV::VREDMIN_VS:
70 case RISCV::VREDMAXU_VS:
71 case RISCV::VREDMINU_VS:
72 case RISCV::VREDSUM_VS:
73 case RISCV::VREDAND_VS:
74 case RISCV::VREDOR_VS:
75 case RISCV::VREDXOR_VS:
76 case RISCV::VFREDMAX_VS:
77 case RISCV::VFREDMIN_VS:
78 case RISCV::VFREDUSUM_VS: {
79 unsigned VL = VT.getVectorMinNumElements();
80 if (!VT.isFixedLengthVector())
81 VL *= *getVScaleForTuning();
82 Cost += Log2_32_Ceil(VL);
83 break;
84 }
85 case RISCV::VFREDOSUM_VS: {
86 unsigned VL = VT.getVectorMinNumElements();
87 if (!VT.isFixedLengthVector())
88 VL *= *getVScaleForTuning();
89 Cost += VL;
90 break;
91 }
92 case RISCV::VMV_X_S:
93 case RISCV::VMV_S_X:
94 case RISCV::VFMV_F_S:
95 case RISCV::VFMV_S_F:
96 case RISCV::VMOR_MM:
97 case RISCV::VMXOR_MM:
98 case RISCV::VMAND_MM:
99 case RISCV::VMANDN_MM:
100 case RISCV::VMNAND_MM:
101 case RISCV::VCPOP_M:
102 case RISCV::VFIRST_M:
103 Cost += 1;
104 break;
105 default:
106 Cost += LMULCost;
107 }
108 }
109 return Cost;
110}
111
113 const RISCVSubtarget *ST,
114 const APInt &Imm, Type *Ty,
116 bool FreeZeroes) {
117 assert(Ty->isIntegerTy() &&
118 "getIntImmCost can only estimate cost of materialising integers");
119
120 // We have a Zero register, so 0 is always free.
121 if (Imm == 0)
122 return TTI::TCC_Free;
123
124 // Otherwise, we check how many instructions it will take to materialise.
125 return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty), *ST,
126 /*CompressionCost=*/false, FreeZeroes);
127}
128
131 return getIntImmCostImpl(getDataLayout(), getST(), Imm, Ty, CostKind, false);
132}
133
134// Look for patterns of shift followed by AND that can be turned into a pair of
135// shifts. We won't need to materialize an immediate for the AND so these can
136// be considered free.
137static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) {
138 uint64_t Mask = Imm.getZExtValue();
139 auto *BO = dyn_cast<BinaryOperator>(Inst->getOperand(0));
140 if (!BO || !BO->hasOneUse())
141 return false;
142
143 if (BO->getOpcode() != Instruction::Shl)
144 return false;
145
146 if (!isa<ConstantInt>(BO->getOperand(1)))
147 return false;
148
149 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
150 // (and (shl x, c2), c1) will be matched to (srli (slli x, c2+c3), c3) if c1
151 // is a mask shifted by c2 bits with c3 leading zeros.
152 if (isShiftedMask_64(Mask)) {
153 unsigned Trailing = llvm::countr_zero(Mask);
154 if (ShAmt == Trailing)
155 return true;
156 }
157
158 return false;
159}
160
162 const APInt &Imm, Type *Ty,
164 Instruction *Inst) {
165 assert(Ty->isIntegerTy() &&
166 "getIntImmCost can only estimate cost of materialising integers");
167
168 // We have a Zero register, so 0 is always free.
169 if (Imm == 0)
170 return TTI::TCC_Free;
171
172 // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
173 // commutative, in others the immediate comes from a specific argument index.
174 bool Takes12BitImm = false;
175 unsigned ImmArgIdx = ~0U;
176
177 switch (Opcode) {
178 case Instruction::GetElementPtr:
179 // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will
180 // split up large offsets in GEP into better parts than ConstantHoisting
181 // can.
182 return TTI::TCC_Free;
183 case Instruction::Store: {
184 // Use the materialization cost regardless of if it's the address or the
185 // value that is constant, except for if the store is misaligned and
186 // misaligned accesses are not legal (experience shows constant hoisting
187 // can sometimes be harmful in such cases).
188 if (Idx == 1 || !Inst)
189 return getIntImmCostImpl(getDataLayout(), getST(), Imm, Ty, CostKind,
190 /*FreeZeroes=*/true);
191
192 StoreInst *ST = cast<StoreInst>(Inst);
193 if (!getTLI()->allowsMemoryAccessForAlignment(
194 Ty->getContext(), DL, getTLI()->getValueType(DL, Ty),
195 ST->getPointerAddressSpace(), ST->getAlign()))
196 return TTI::TCC_Free;
197
198 return getIntImmCostImpl(getDataLayout(), getST(), Imm, Ty, CostKind,
199 /*FreeZeroes=*/true);
200 }
201 case Instruction::Load:
202 // If the address is a constant, use the materialization cost.
203 return getIntImmCost(Imm, Ty, CostKind);
204 case Instruction::And:
205 // zext.h
206 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
207 return TTI::TCC_Free;
208 // zext.w
209 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
210 return TTI::TCC_Free;
211 // bclri
212 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
213 return TTI::TCC_Free;
214 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
215 canUseShiftPair(Inst, Imm))
216 return TTI::TCC_Free;
217 Takes12BitImm = true;
218 break;
219 case Instruction::Add:
220 Takes12BitImm = true;
221 break;
222 case Instruction::Or:
223 case Instruction::Xor:
224 // bseti/binvi
225 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
226 return TTI::TCC_Free;
227 Takes12BitImm = true;
228 break;
229 case Instruction::Mul:
230 // Power of 2 is a shift. Negated power of 2 is a shift and a negate.
231 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
232 return TTI::TCC_Free;
233 // One more or less than a power of 2 can use SLLI+ADD/SUB.
234 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
235 return TTI::TCC_Free;
236 // FIXME: There is no MULI instruction.
237 Takes12BitImm = true;
238 break;
239 case Instruction::Sub:
240 case Instruction::Shl:
241 case Instruction::LShr:
242 case Instruction::AShr:
243 Takes12BitImm = true;
244 ImmArgIdx = 1;
245 break;
246 default:
247 break;
248 }
249
250 if (Takes12BitImm) {
251 // Check immediate is the correct argument...
252 if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
253 // ... and fits into the 12-bit immediate.
254 if (Imm.getSignificantBits() <= 64 &&
255 getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
256 return TTI::TCC_Free;
257 }
258 }
259
260 // Otherwise, use the full materialisation cost.
261 return getIntImmCost(Imm, Ty, CostKind);
262 }
263
264 // By default, prevent hoisting.
265 return TTI::TCC_Free;
266}
267
270 const APInt &Imm, Type *Ty,
272 // Prevent hoisting in unknown cases.
273 return TTI::TCC_Free;
274}
275
276bool RISCVTTIImpl::hasActiveVectorLength(unsigned, Type *DataTy, Align) const {
277 return ST->hasVInstructions();
278}
279
282 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
283 return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->is64Bit())
286}
287
289 // Currently, the ExpandReductions pass can't expand scalable-vector
290 // reductions, but we still request expansion as RVV doesn't support certain
291 // reductions and the SelectionDAG can't legalize them either.
292 switch (II->getIntrinsicID()) {
293 default:
294 return false;
295 // These reductions have no equivalent in RVV
296 case Intrinsic::vector_reduce_mul:
297 case Intrinsic::vector_reduce_fmul:
298 return true;
299 }
300}
301
302std::optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
303 if (ST->hasVInstructions())
305 return BaseT::getMaxVScale();
306}
307
308std::optional<unsigned> RISCVTTIImpl::getVScaleForTuning() const {
309 if (ST->hasVInstructions())
310 if (unsigned MinVLen = ST->getRealMinVLen();
311 MinVLen >= RISCV::RVVBitsPerBlock)
312 return MinVLen / RISCV::RVVBitsPerBlock;
314}
315
318 unsigned LMUL =
319 llvm::bit_floor(std::clamp<unsigned>(RVVRegisterWidthLMUL, 1, 8));
320 switch (K) {
322 return TypeSize::getFixed(ST->getXLen());
324 return TypeSize::getFixed(
325 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
328 (ST->hasVInstructions() &&
331 : 0);
332 }
333
334 llvm_unreachable("Unsupported register kind");
335}
336
338RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
339 // Add a cost of address generation + the cost of the load. The address
340 // is expected to be a PC relative offset to a constant pool entry
341 // using auipc/addi.
342 return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
343 /*AddressSpace=*/0, CostKind);
344}
345
346static bool isRepeatedConcatMask(ArrayRef<int> Mask, int &SubVectorSize) {
347 unsigned Size = Mask.size();
348 if (!isPowerOf2_32(Size))
349 return false;
350 for (unsigned I = 0; I != Size; ++I) {
351 if (static_cast<unsigned>(Mask[I]) == I)
352 continue;
353 if (Mask[I] != 0)
354 return false;
355 if (Size % I != 0)
356 return false;
357 for (unsigned J = I + 1; J != Size; ++J)
358 // Check the pattern is repeated.
359 if (static_cast<unsigned>(Mask[J]) != J % I)
360 return false;
361 SubVectorSize = I;
362 return true;
363 }
364 // That means Mask is <0, 1, 2, 3>. This is not a concatenation.
365 return false;
366}
367
369 LLVMContext &C) {
370 assert((DataVT.getScalarSizeInBits() != 8 ||
371 DataVT.getVectorNumElements() <= 256) && "unhandled case in lowering");
372 MVT IndexVT = DataVT.changeTypeToInteger();
373 if (IndexVT.getScalarType().bitsGT(ST.getXLenVT()))
374 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
375 return cast<VectorType>(EVT(IndexVT).getTypeForEVT(C));
376}
377
379 VectorType *Tp, ArrayRef<int> Mask,
381 int Index, VectorType *SubTp,
383 const Instruction *CxtI) {
384 Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp);
385
386 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
387
388 // First, handle cases where having a fixed length vector enables us to
389 // give a more accurate cost than falling back to generic scalable codegen.
390 // TODO: Each of these cases hints at a modeling gap around scalable vectors.
391 if (isa<FixedVectorType>(Tp)) {
392 switch (Kind) {
393 default:
394 break;
396 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
397 MVT EltTp = LT.second.getVectorElementType();
398 // If the size of the element is < ELEN then shuffles of interleaves and
399 // deinterleaves of 2 vectors can be lowered into the following
400 // sequences
401 if (EltTp.getScalarSizeInBits() < ST->getELen()) {
402 // Example sequence:
403 // vsetivli zero, 4, e8, mf4, ta, ma (ignored)
404 // vwaddu.vv v10, v8, v9
405 // li a0, -1 (ignored)
406 // vwmaccu.vx v10, a0, v9
407 if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size()))
408 return 2 * LT.first * TLI->getLMULCost(LT.second);
409
410 if (Mask[0] == 0 || Mask[0] == 1) {
411 auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
412 // Example sequence:
413 // vnsrl.wi v10, v8, 0
414 if (equal(DeinterleaveMask, Mask))
415 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
416 LT.second, CostKind);
417 }
418 }
419 int SubVectorSize;
420 if (LT.second.getScalarSizeInBits() != 1 &&
421 isRepeatedConcatMask(Mask, SubVectorSize)) {
423 unsigned NumSlides = Log2_32(Mask.size() / SubVectorSize);
424 // The cost of extraction from a subvector is 0 if the index is 0.
425 for (unsigned I = 0; I != NumSlides; ++I) {
426 unsigned InsertIndex = SubVectorSize * (1 << I);
427 FixedVectorType *SubTp =
428 FixedVectorType::get(Tp->getElementType(), InsertIndex);
429 FixedVectorType *DestTp =
431 std::pair<InstructionCost, MVT> DestLT =
433 // Add the cost of whole vector register move because the
434 // destination vector register group for vslideup cannot overlap the
435 // source.
436 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
438 CostKind, InsertIndex, SubTp);
439 }
440 return Cost;
441 }
442 }
443 // vrgather + cost of generating the mask constant.
444 // We model this for an unknown mask with a single vrgather.
445 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
446 (LT.second.getScalarSizeInBits() != 8 ||
447 LT.second.getVectorNumElements() <= 256)) {
448 VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
449 InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
450 return IndexCost +
451 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
452 }
453 [[fallthrough]];
454 }
457 // 2 x (vrgather + cost of generating the mask constant) + cost of mask
458 // register for the second vrgather. We model this for an unknown
459 // (shuffle) mask.
460 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
461 (LT.second.getScalarSizeInBits() != 8 ||
462 LT.second.getVectorNumElements() <= 256)) {
463 auto &C = Tp->getContext();
464 auto EC = Tp->getElementCount();
465 VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
467 InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
468 InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
469 return 2 * IndexCost +
470 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
471 LT.second, CostKind) +
472 MaskCost;
473 }
474 [[fallthrough]];
475 }
476 case TTI::SK_Select: {
477 // We are going to permute multiple sources and the result will be in
478 // multiple destinations. Providing an accurate cost only for splits where
479 // the element type remains the same.
480 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
481 LT.second.isFixedLengthVector() &&
482 LT.second.getVectorElementType().getSizeInBits() ==
484 LT.second.getVectorNumElements() <
485 cast<FixedVectorType>(Tp)->getNumElements() &&
486 divideCeil(Mask.size(),
487 cast<FixedVectorType>(Tp)->getNumElements()) ==
488 static_cast<unsigned>(*LT.first.getValue())) {
489 unsigned NumRegs = *LT.first.getValue();
490 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
491 unsigned SubVF = PowerOf2Ceil(VF / NumRegs);
492 auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);
493
495 for (unsigned I = 0, NumSrcRegs = divideCeil(Mask.size(), SubVF);
496 I < NumSrcRegs; ++I) {
497 bool IsSingleVector = true;
498 SmallVector<int> SubMask(SubVF, PoisonMaskElem);
499 transform(
500 Mask.slice(I * SubVF,
501 I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),
502 SubMask.begin(), [&](int I) -> int {
503 if (I == PoisonMaskElem)
504 return PoisonMaskElem;
505 bool SingleSubVector = I / VF == 0;
506 IsSingleVector &= SingleSubVector;
507 return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;
508 });
509 if (all_of(enumerate(SubMask), [](auto &&P) {
510 return P.value() == PoisonMaskElem ||
511 static_cast<unsigned>(P.value()) == P.index();
512 }))
513 continue;
516 SubVecTy, SubMask, CostKind, 0, nullptr);
517 }
518 return Cost;
519 }
520 break;
521 }
522 }
523 };
524
525 // Handle scalable vectors (and fixed vectors legalized to scalable vectors).
526 switch (Kind) {
527 default:
528 // Fallthrough to generic handling.
529 // TODO: Most of these cases will return getInvalid in generic code, and
530 // must be implemented here.
531 break;
533 // Extract at zero is always a subregister extract
534 if (Index == 0)
535 return TTI::TCC_Free;
536
537 // If we're extracting a subvector of at most m1 size at a sub-register
538 // boundary - which unfortunately we need exact vlen to identify - this is
539 // a subregister extract at worst and thus won't require a vslidedown.
540 // TODO: Extend for aligned m2, m4 subvector extracts
541 // TODO: Extend for misalgined (but contained) extracts
542 // TODO: Extend for scalable subvector types
543 if (std::pair<InstructionCost, MVT> SubLT = getTypeLegalizationCost(SubTp);
544 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
545 const unsigned MinVLen = ST->getRealMinVLen();
546 const unsigned MaxVLen = ST->getRealMaxVLen();
547 if (MinVLen == MaxVLen &&
548 SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&
549 SubLT.second.getSizeInBits() <= MinVLen)
550 return TTI::TCC_Free;
551 }
552
553 // Example sequence:
554 // vsetivli zero, 4, e8, mf2, tu, ma (ignored)
555 // vslidedown.vi v8, v9, 2
556 return LT.first *
557 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);
559 // Example sequence:
560 // vsetivli zero, 4, e8, mf2, tu, ma (ignored)
561 // vslideup.vi v8, v9, 2
562 return LT.first *
563 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind);
564 case TTI::SK_Select: {
565 // Example sequence:
566 // li a0, 90
567 // vsetivli zero, 8, e8, mf2, ta, ma (ignored)
568 // vmv.s.x v0, a0
569 // vmerge.vvm v8, v9, v8, v0
570 // We use 2 for the cost of the mask materialization as this is the true
571 // cost for small masks and most shuffles are small. At worst, this cost
572 // should be a very small constant for the constant pool load. As such,
573 // we may bias towards large selects slightly more than truely warranted.
574 return LT.first *
575 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
576 LT.second, CostKind));
577 }
578 case TTI::SK_Broadcast: {
579 bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
580 Instruction::InsertElement);
581 if (LT.second.getScalarSizeInBits() == 1) {
582 if (HasScalar) {
583 // Example sequence:
584 // andi a0, a0, 1
585 // vsetivli zero, 2, e8, mf8, ta, ma (ignored)
586 // vmv.v.x v8, a0
587 // vmsne.vi v0, v8, 0
588 return LT.first *
589 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
590 LT.second, CostKind));
591 }
592 // Example sequence:
593 // vsetivli zero, 2, e8, mf8, ta, mu (ignored)
594 // vmv.v.i v8, 0
595 // vmerge.vim v8, v8, 1, v0
596 // vmv.x.s a0, v8
597 // andi a0, a0, 1
598 // vmv.v.x v8, a0
599 // vmsne.vi v0, v8, 0
600
601 return LT.first *
602 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
603 RISCV::VMV_X_S, RISCV::VMV_V_X,
604 RISCV::VMSNE_VI},
605 LT.second, CostKind));
606 }
607
608 if (HasScalar) {
609 // Example sequence:
610 // vmv.v.x v8, a0
611 return LT.first *
612 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
613 }
614
615 // Example sequence:
616 // vrgather.vi v9, v8, 0
617 return LT.first *
618 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind);
619 }
620 case TTI::SK_Splice: {
621 // vslidedown+vslideup.
622 // TODO: Multiplying by LT.first implies this legalizes into multiple copies
623 // of similar code, but I think we expand through memory.
624 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
625 if (Index >= 0 && Index < 32)
626 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
627 else if (Index < 0 && Index > -32)
628 Opcodes[1] = RISCV::VSLIDEUP_VI;
629 return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
630 }
631 case TTI::SK_Reverse: {
632 // TODO: Cases to improve here:
633 // * Illegal vector types
634 // * i64 on RV32
635 // * i1 vector
636 // At low LMUL, most of the cost is producing the vrgather index register.
637 // At high LMUL, the cost of the vrgather itself will dominate.
638 // Example sequence:
639 // csrr a0, vlenb
640 // srli a0, a0, 3
641 // addi a0, a0, -1
642 // vsetvli a1, zero, e8, mf8, ta, mu (ignored)
643 // vid.v v9
644 // vrsub.vx v10, v9, a0
645 // vrgather.vv v9, v8, v10
646 InstructionCost LenCost = 3;
647 if (LT.second.isFixedLengthVector())
648 // vrsub.vi has a 5 bit immediate field, otherwise an li suffices
649 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
650 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
651 if (LT.second.isFixedLengthVector() &&
652 isInt<5>(LT.second.getVectorNumElements() - 1))
653 Opcodes[1] = RISCV::VRSUB_VI;
654 InstructionCost GatherCost =
655 getRISCVInstructionCost(Opcodes, LT.second, CostKind);
656 // Mask operation additionally required extend and truncate
657 InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
658 return LT.first * (LenCost + GatherCost + ExtendCost);
659 }
660 }
661 return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
662}
663
664static unsigned isM1OrSmaller(MVT VT) {
666 return (LMUL == RISCVII::VLMUL::LMUL_F8 || LMUL == RISCVII::VLMUL::LMUL_F4 ||
668}
669
671 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
673 if (isa<ScalableVectorType>(Ty))
675
676 // A build_vector (which is m1 sized or smaller) can be done in no
677 // worse than one vslide1down.vx per element in the type. We could
678 // in theory do an explode_vector in the inverse manner, but our
679 // lowering today does not have a first class node for this pattern.
681 Ty, DemandedElts, Insert, Extract, CostKind);
682 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
683 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
684 if (Ty->getScalarSizeInBits() == 1) {
685 auto *WideVecTy = cast<VectorType>(Ty->getWithNewBitWidth(8));
686 // Note: Implicit scalar anyextend is assumed to be free since the i1
687 // must be stored in a GPR.
688 return getScalarizationOverhead(WideVecTy, DemandedElts, Insert, Extract,
689 CostKind) +
690 getCastInstrCost(Instruction::Trunc, Ty, WideVecTy,
692 }
693
694 assert(LT.second.isFixedLengthVector());
695 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
696 if (isM1OrSmaller(ContainerVT)) {
697 InstructionCost BV =
698 cast<FixedVectorType>(Ty)->getNumElements() *
699 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second, CostKind);
700 if (BV < Cost)
701 Cost = BV;
702 }
703 }
704 return Cost;
705}
706
708RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
709 unsigned AddressSpace,
711 if (!isLegalMaskedLoadStore(Src, Alignment) ||
713 return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
714 CostKind);
715
716 return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
717}
718
720 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
721 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
722 bool UseMaskForCond, bool UseMaskForGaps) {
723
724 // The interleaved memory access pass will lower interleaved memory ops (i.e
725 // a load and store followed by a specific shuffle) to vlseg/vsseg
726 // intrinsics.
727 if (!UseMaskForCond && !UseMaskForGaps &&
728 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
729 auto *VTy = cast<VectorType>(VecTy);
730 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VTy);
731 // Need to make sure type has't been scalarized
732 if (LT.second.isVector()) {
733 auto *SubVecTy =
734 VectorType::get(VTy->getElementType(),
735 VTy->getElementCount().divideCoefficientBy(Factor));
736 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
737 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
738 AddressSpace, DL)) {
739
740 // Some processors optimize segment loads/stores as one wide memory op +
741 // Factor * LMUL shuffle ops.
742 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
744 getMemoryOpCost(Opcode, VTy, Alignment, AddressSpace, CostKind);
745 MVT SubVecVT = getTLI()->getValueType(DL, SubVecTy).getSimpleVT();
746 Cost += Factor * TLI->getLMULCost(SubVecVT);
747 return LT.first * Cost;
748 }
749
750 // Otherwise, the cost is proportional to the number of elements (VL *
751 // Factor ops).
752 InstructionCost MemOpCost =
753 getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0,
754 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
755 unsigned NumLoads = getEstimatedVLFor(VTy);
756 return NumLoads * MemOpCost;
757 }
758 }
759 }
760
761 // TODO: Return the cost of interleaved accesses for scalable vector when
762 // unable to convert to segment accesses instructions.
763 if (isa<ScalableVectorType>(VecTy))
765
766 auto *FVTy = cast<FixedVectorType>(VecTy);
767 InstructionCost MemCost =
768 getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);
769 unsigned VF = FVTy->getNumElements() / Factor;
770
771 // An interleaved load will look like this for Factor=3:
772 // %wide.vec = load <12 x i32>, ptr %3, align 4
773 // %strided.vec = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
774 // %strided.vec1 = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
775 // %strided.vec2 = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
776 if (Opcode == Instruction::Load) {
777 InstructionCost Cost = MemCost;
778 for (unsigned Index : Indices) {
779 FixedVectorType *SubVecTy =
780 FixedVectorType::get(FVTy->getElementType(), VF * Factor);
781 auto Mask = createStrideMask(Index, Factor, VF);
782 InstructionCost ShuffleCost =
784 CostKind, 0, nullptr, {});
785 Cost += ShuffleCost;
786 }
787 return Cost;
788 }
789
790 // TODO: Model for NF > 2
791 // We'll need to enhance getShuffleCost to model shuffles that are just
792 // inserts and extracts into subvectors, since they won't have the full cost
793 // of a vrgather.
794 // An interleaved store for 3 vectors of 4 lanes will look like
795 // %11 = shufflevector <4 x i32> %4, <4 x i32> %6, <8 x i32> <0...7>
796 // %12 = shufflevector <4 x i32> %9, <4 x i32> poison, <8 x i32> <0...3>
797 // %13 = shufflevector <8 x i32> %11, <8 x i32> %12, <12 x i32> <0...11>
798 // %interleaved.vec = shufflevector %13, poison, <12 x i32> <interleave mask>
799 // store <12 x i32> %interleaved.vec, ptr %10, align 4
800 if (Factor != 2)
801 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
802 Alignment, AddressSpace, CostKind,
803 UseMaskForCond, UseMaskForGaps);
804
805 assert(Opcode == Instruction::Store && "Opcode must be a store");
806 // For an interleaving store of 2 vectors, we perform one large interleaving
807 // shuffle that goes into the wide store
808 auto Mask = createInterleaveMask(VF, Factor);
809 InstructionCost ShuffleCost =
811 CostKind, 0, nullptr, {});
812 return MemCost + ShuffleCost;
813}
814
816 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
817 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
819 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
820 Alignment, CostKind, I);
821
822 if ((Opcode == Instruction::Load &&
823 !isLegalMaskedGather(DataTy, Align(Alignment))) ||
824 (Opcode == Instruction::Store &&
825 !isLegalMaskedScatter(DataTy, Align(Alignment))))
826 return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
827 Alignment, CostKind, I);
828
829 // Cost is proportional to the number of memory operations implied. For
830 // scalable vectors, we use an estimate on that number since we don't
831 // know exactly what VL will be.
832 auto &VTy = *cast<VectorType>(DataTy);
833 InstructionCost MemOpCost =
834 getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
835 {TTI::OK_AnyValue, TTI::OP_None}, I);
836 unsigned NumLoads = getEstimatedVLFor(&VTy);
837 return NumLoads * MemOpCost;
838}
839
841 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
842 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
843 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
844 !isLegalStridedLoadStore(DataTy, Alignment)) ||
845 (Opcode != Instruction::Load && Opcode != Instruction::Store))
846 return BaseT::getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
847 Alignment, CostKind, I);
848
850 return TTI::TCC_Basic;
851
852 // Cost is proportional to the number of memory operations implied. For
853 // scalable vectors, we use an estimate on that number since we don't
854 // know exactly what VL will be.
855 auto &VTy = *cast<VectorType>(DataTy);
856 InstructionCost MemOpCost =
857 getMemoryOpCost(Opcode, VTy.getElementType(), Alignment, 0, CostKind,
858 {TTI::OK_AnyValue, TTI::OP_None}, I);
859 unsigned NumLoads = getEstimatedVLFor(&VTy);
860 return NumLoads * MemOpCost;
861}
862
865 // FIXME: This is a property of the default vector convention, not
866 // all possible calling conventions. Fixing that will require
867 // some TTI API and SLP rework.
870 for (auto *Ty : Tys) {
871 if (!Ty->isVectorTy())
872 continue;
874 Cost += getMemoryOpCost(Instruction::Store, Ty, A, 0, CostKind) +
875 getMemoryOpCost(Instruction::Load, Ty, A, 0, CostKind);
876 }
877 return Cost;
878}
879
880// Currently, these represent both throughput and codesize costs
881// for the respective intrinsics. The costs in this table are simply
882// instruction counts with the following adjustments made:
883// * One vsetvli is considered free.
885 {Intrinsic::floor, MVT::f32, 9},
886 {Intrinsic::floor, MVT::f64, 9},
887 {Intrinsic::ceil, MVT::f32, 9},
888 {Intrinsic::ceil, MVT::f64, 9},
889 {Intrinsic::trunc, MVT::f32, 7},
890 {Intrinsic::trunc, MVT::f64, 7},
891 {Intrinsic::round, MVT::f32, 9},
892 {Intrinsic::round, MVT::f64, 9},
893 {Intrinsic::roundeven, MVT::f32, 9},
894 {Intrinsic::roundeven, MVT::f64, 9},
895 {Intrinsic::rint, MVT::f32, 7},
896 {Intrinsic::rint, MVT::f64, 7},
897 {Intrinsic::lrint, MVT::i32, 1},
898 {Intrinsic::lrint, MVT::i64, 1},
899 {Intrinsic::llrint, MVT::i64, 1},
900 {Intrinsic::nearbyint, MVT::f32, 9},
901 {Intrinsic::nearbyint, MVT::f64, 9},
902 {Intrinsic::bswap, MVT::i16, 3},
903 {Intrinsic::bswap, MVT::i32, 12},
904 {Intrinsic::bswap, MVT::i64, 31},
905 {Intrinsic::vp_bswap, MVT::i16, 3},
906 {Intrinsic::vp_bswap, MVT::i32, 12},
907 {Intrinsic::vp_bswap, MVT::i64, 31},
908 {Intrinsic::vp_fshl, MVT::i8, 7},
909 {Intrinsic::vp_fshl, MVT::i16, 7},
910 {Intrinsic::vp_fshl, MVT::i32, 7},
911 {Intrinsic::vp_fshl, MVT::i64, 7},
912 {Intrinsic::vp_fshr, MVT::i8, 7},
913 {Intrinsic::vp_fshr, MVT::i16, 7},
914 {Intrinsic::vp_fshr, MVT::i32, 7},
915 {Intrinsic::vp_fshr, MVT::i64, 7},
916 {Intrinsic::bitreverse, MVT::i8, 17},
917 {Intrinsic::bitreverse, MVT::i16, 24},
918 {Intrinsic::bitreverse, MVT::i32, 33},
919 {Intrinsic::bitreverse, MVT::i64, 52},
920 {Intrinsic::vp_bitreverse, MVT::i8, 17},
921 {Intrinsic::vp_bitreverse, MVT::i16, 24},
922 {Intrinsic::vp_bitreverse, MVT::i32, 33},
923 {Intrinsic::vp_bitreverse, MVT::i64, 52},
924 {Intrinsic::ctpop, MVT::i8, 12},
925 {Intrinsic::ctpop, MVT::i16, 19},
926 {Intrinsic::ctpop, MVT::i32, 20},
927 {Intrinsic::ctpop, MVT::i64, 21},
928 {Intrinsic::ctlz, MVT::i8, 19},
929 {Intrinsic::ctlz, MVT::i16, 28},
930 {Intrinsic::ctlz, MVT::i32, 31},
931 {Intrinsic::ctlz, MVT::i64, 35},
932 {Intrinsic::cttz, MVT::i8, 16},
933 {Intrinsic::cttz, MVT::i16, 23},
934 {Intrinsic::cttz, MVT::i32, 24},
935 {Intrinsic::cttz, MVT::i64, 25},
936 {Intrinsic::vp_ctpop, MVT::i8, 12},
937 {Intrinsic::vp_ctpop, MVT::i16, 19},
938 {Intrinsic::vp_ctpop, MVT::i32, 20},
939 {Intrinsic::vp_ctpop, MVT::i64, 21},
940 {Intrinsic::vp_ctlz, MVT::i8, 19},
941 {Intrinsic::vp_ctlz, MVT::i16, 28},
942 {Intrinsic::vp_ctlz, MVT::i32, 31},
943 {Intrinsic::vp_ctlz, MVT::i64, 35},
944 {Intrinsic::vp_cttz, MVT::i8, 16},
945 {Intrinsic::vp_cttz, MVT::i16, 23},
946 {Intrinsic::vp_cttz, MVT::i32, 24},
947 {Intrinsic::vp_cttz, MVT::i64, 25},
948};
949
951 switch (ID) {
952#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
953 case Intrinsic::VPID: \
954 return ISD::VPSD;
955#include "llvm/IR/VPIntrinsics.def"
956#undef HELPER_MAP_VPID_TO_VPSD
957 }
958 return ISD::DELETED_NODE;
959}
960
964 auto *RetTy = ICA.getReturnType();
965 switch (ICA.getID()) {
966 case Intrinsic::lrint:
967 case Intrinsic::llrint:
968 // We can't currently lower half or bfloat vector lrint/llrint.
969 if (auto *VecTy = dyn_cast<VectorType>(ICA.getArgTypes()[0]);
970 VecTy && VecTy->getElementType()->is16bitFPTy())
972 [[fallthrough]];
973 case Intrinsic::ceil:
974 case Intrinsic::floor:
975 case Intrinsic::trunc:
976 case Intrinsic::rint:
977 case Intrinsic::round:
978 case Intrinsic::roundeven: {
979 // These all use the same code.
981 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
982 return LT.first * 8;
983 break;
984 }
985 case Intrinsic::umin:
986 case Intrinsic::umax:
987 case Intrinsic::smin:
988 case Intrinsic::smax: {
990 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
991 return LT.first;
992
993 if (ST->hasVInstructions() && LT.second.isVector()) {
994 unsigned Op;
995 switch (ICA.getID()) {
996 case Intrinsic::umin:
997 Op = RISCV::VMINU_VV;
998 break;
999 case Intrinsic::umax:
1000 Op = RISCV::VMAXU_VV;
1001 break;
1002 case Intrinsic::smin:
1003 Op = RISCV::VMIN_VV;
1004 break;
1005 case Intrinsic::smax:
1006 Op = RISCV::VMAX_VV;
1007 break;
1008 }
1009 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1010 }
1011 break;
1012 }
1013 case Intrinsic::sadd_sat:
1014 case Intrinsic::ssub_sat:
1015 case Intrinsic::uadd_sat:
1016 case Intrinsic::usub_sat: {
1017 auto LT = getTypeLegalizationCost(RetTy);
1018 if (ST->hasVInstructions() && LT.second.isVector()) {
1019 unsigned Op;
1020 switch (ICA.getID()) {
1021 case Intrinsic::sadd_sat:
1022 Op = RISCV::VSADD_VV;
1023 break;
1024 case Intrinsic::ssub_sat:
1025 Op = RISCV::VSSUBU_VV;
1026 break;
1027 case Intrinsic::uadd_sat:
1028 Op = RISCV::VSADDU_VV;
1029 break;
1030 case Intrinsic::usub_sat:
1031 Op = RISCV::VSSUBU_VV;
1032 break;
1033 }
1034 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1035 }
1036 break;
1037 }
1038 case Intrinsic::fabs:
1039 case Intrinsic::sqrt: {
1040 auto LT = getTypeLegalizationCost(RetTy);
1041 // TODO: add f16/bf16, bf16 with zvfbfmin && f16 with zvfhmin
1042 if (ST->hasVInstructions() && LT.second.isVector()) {
1043 unsigned Op;
1044 switch (ICA.getID()) {
1045 case Intrinsic::fabs:
1046 Op = RISCV::VFSGNJX_VV;
1047 break;
1048 case Intrinsic::sqrt:
1049 Op = RISCV::VFSQRT_V;
1050 break;
1051 }
1052 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1053 }
1054 break;
1055 }
1056 case Intrinsic::cttz:
1057 case Intrinsic::ctlz:
1058 case Intrinsic::ctpop: {
1059 auto LT = getTypeLegalizationCost(RetTy);
1060 if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {
1061 unsigned Op;
1062 switch (ICA.getID()) {
1063 case Intrinsic::cttz:
1064 Op = RISCV::VCTZ_V;
1065 break;
1066 case Intrinsic::ctlz:
1067 Op = RISCV::VCLZ_V;
1068 break;
1069 case Intrinsic::ctpop:
1070 Op = RISCV::VCPOP_V;
1071 break;
1072 }
1073 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1074 }
1075 break;
1076 }
1077 case Intrinsic::abs: {
1078 auto LT = getTypeLegalizationCost(RetTy);
1079 if (ST->hasVInstructions() && LT.second.isVector()) {
1080 // vrsub.vi v10, v8, 0
1081 // vmax.vv v8, v8, v10
1082 return LT.first *
1083 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1084 LT.second, CostKind);
1085 }
1086 break;
1087 }
1088 case Intrinsic::get_active_lane_mask: {
1089 if (ST->hasVInstructions()) {
1090 Type *ExpRetTy = VectorType::get(
1091 ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
1092 auto LT = getTypeLegalizationCost(ExpRetTy);
1093
1094 // vid.v v8 // considered hoisted
1095 // vsaddu.vx v8, v8, a0
1096 // vmsltu.vx v0, v8, a1
1097 return LT.first *
1098 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1099 LT.second, CostKind);
1100 }
1101 break;
1102 }
1103 // TODO: add more intrinsic
1104 case Intrinsic::stepvector: {
1105 auto LT = getTypeLegalizationCost(RetTy);
1106 // Legalisation of illegal types involves an `index' instruction plus
1107 // (LT.first - 1) vector adds.
1108 if (ST->hasVInstructions())
1109 return getRISCVInstructionCost(RISCV::VID_V, LT.second, CostKind) +
1110 (LT.first - 1) *
1111 getRISCVInstructionCost(RISCV::VADD_VX, LT.second, CostKind);
1112 return 1 + (LT.first - 1);
1113 }
1114 case Intrinsic::experimental_cttz_elts: {
1115 Type *ArgTy = ICA.getArgTypes()[0];
1116 EVT ArgType = TLI->getValueType(DL, ArgTy, true);
1117 if (getTLI()->shouldExpandCttzElements(ArgType))
1118 break;
1119 InstructionCost Cost = getRISCVInstructionCost(
1120 RISCV::VFIRST_M, getTypeLegalizationCost(ArgTy).second, CostKind);
1121
1122 // If zero_is_poison is false, then we will generate additional
1123 // cmp + select instructions to convert -1 to EVL.
1124 Type *BoolTy = Type::getInt1Ty(RetTy->getContext());
1125 if (ICA.getArgs().size() > 1 &&
1126 cast<ConstantInt>(ICA.getArgs()[1])->isZero())
1127 Cost += getCmpSelInstrCost(Instruction::ICmp, BoolTy, RetTy,
1129 getCmpSelInstrCost(Instruction::Select, RetTy, BoolTy,
1131
1132 return Cost;
1133 }
1134 case Intrinsic::vp_rint: {
1135 // RISC-V target uses at least 5 instructions to lower rounding intrinsics.
1136 unsigned Cost = 5;
1137 auto LT = getTypeLegalizationCost(RetTy);
1138 if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
1139 return Cost * LT.first;
1140 break;
1141 }
1142 case Intrinsic::vp_nearbyint: {
1143 // More one read and one write for fflags than vp_rint.
1144 unsigned Cost = 7;
1145 auto LT = getTypeLegalizationCost(RetTy);
1146 if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
1147 return Cost * LT.first;
1148 break;
1149 }
1150 case Intrinsic::vp_ceil:
1151 case Intrinsic::vp_floor:
1152 case Intrinsic::vp_round:
1153 case Intrinsic::vp_roundeven:
1154 case Intrinsic::vp_roundtozero: {
1155 // Rounding with static rounding mode needs two more instructions to
1156 // swap/write FRM than vp_rint.
1157 unsigned Cost = 7;
1158 auto LT = getTypeLegalizationCost(RetTy);
1159 unsigned VPISD = getISDForVPIntrinsicID(ICA.getID());
1160 if (TLI->isOperationCustom(VPISD, LT.second))
1161 return Cost * LT.first;
1162 break;
1163 }
1164 case Intrinsic::vp_fneg: {
1165 std::optional<unsigned> FOp =
1167 assert(FOp.has_value());
1168 return getArithmeticInstrCost(*FOp, ICA.getReturnType(), CostKind);
1169 break;
1170 }
1171 case Intrinsic::vp_select: {
1172 Intrinsic::ID IID = ICA.getID();
1173 std::optional<unsigned> FOp = VPIntrinsic::getFunctionalOpcodeForVP(IID);
1174 assert(FOp.has_value());
1175 return getCmpSelInstrCost(*FOp, ICA.getReturnType(), ICA.getArgTypes()[0],
1177 }
1178 case Intrinsic::vp_merge:
1179 return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
1181 CostKind);
1182 case Intrinsic::experimental_vp_splat: {
1183 auto LT = getTypeLegalizationCost(RetTy);
1184 // TODO: Lower i1 experimental_vp_splat
1185 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1187 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1188 ? RISCV::VFMV_V_F
1189 : RISCV::VMV_V_X,
1190 LT.second, CostKind);
1191 }
1192 }
1193
1194 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1195 if (auto LT = getTypeLegalizationCost(RetTy);
1196 LT.second.isVector()) {
1197 MVT EltTy = LT.second.getVectorElementType();
1198 if (const auto *Entry = CostTableLookup(VectorIntrinsicCostTable,
1199 ICA.getID(), EltTy))
1200 return LT.first * Entry->Cost;
1201 }
1202 }
1203
1205}
1206
1208 Type *Src,
1211 const Instruction *I) {
1212 bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);
1213 if (!IsVectorType)
1214 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1215
1216 // FIXME: Need to compute legalizing cost for illegal types. The current
1217 // code handles only legal types and those which can be trivially
1218 // promoted to legal.
1219 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1220 Dst->getScalarSizeInBits() > ST->getELen())
1221 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1222
1223 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1224 assert(ISD && "Invalid opcode");
1225 std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
1226 std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
1227
1228 // Handle i1 source and dest cases *before* calling logic in BasicTTI.
1229 // The shared implementation doesn't model vector widening during legalization
1230 // and instead assumes scalarization. In order to scalarize an <N x i1>
1231 // vector, we need to extend/trunc to/from i8. If we don't special case
1232 // this, we can get an infinite recursion cycle.
1233 switch (ISD) {
1234 default:
1235 break;
1236 case ISD::SIGN_EXTEND:
1237 case ISD::ZERO_EXTEND:
1238 if (Src->getScalarSizeInBits() == 1) {
1239 // We do not use vsext/vzext to extend from mask vector.
1240 // Instead we use the following instructions to extend from mask vector:
1241 // vmv.v.i v8, 0
1242 // vmerge.vim v8, v8, -1, v0 (repeated per split)
1243 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second, CostKind) +
1244 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1245 DstLT.second, CostKind) +
1246 DstLT.first - 1;
1247 }
1248 break;
1249 case ISD::TRUNCATE:
1250 if (Dst->getScalarSizeInBits() == 1) {
1251 // We do not use several vncvt to truncate to mask vector. So we could
1252 // not use PowDiff to calculate it.
1253 // Instead we use the following instructions to truncate to mask vector:
1254 // vand.vi v8, v8, 1
1255 // vmsne.vi v0, v8, 0
1256 return SrcLT.first *
1257 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1258 SrcLT.second, CostKind) +
1259 SrcLT.first - 1;
1260 }
1261 break;
1262 };
1263
1264 // Our actual lowering for the case where a wider legal type is available
1265 // uses promotion to the wider type. This is reflected in the result of
1266 // getTypeLegalizationCost, but BasicTTI assumes the widened cases are
1267 // scalarized if the legalized Src and Dst are not equal sized.
1268 const DataLayout &DL = this->getDataLayout();
1269 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1271 SrcLT.second.getSizeInBits()) ||
1273 DstLT.second.getSizeInBits()))
1274 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1275
1276 // The split cost is handled by the base getCastInstrCost
1277 assert((SrcLT.first == 1) && (DstLT.first == 1) && "Illegal type");
1278
1279 int PowDiff = (int)Log2_32(DstLT.second.getScalarSizeInBits()) -
1280 (int)Log2_32(SrcLT.second.getScalarSizeInBits());
1281 switch (ISD) {
1282 case ISD::SIGN_EXTEND:
1283 case ISD::ZERO_EXTEND: {
1284 if ((PowDiff < 1) || (PowDiff > 3))
1285 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1286 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1287 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1288 unsigned Op =
1289 (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1];
1290 return getRISCVInstructionCost(Op, DstLT.second, CostKind);
1291 }
1292 case ISD::TRUNCATE:
1293 case ISD::FP_EXTEND:
1294 case ISD::FP_ROUND: {
1295 // Counts of narrow/widen instructions.
1296 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1297 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1298
1299 unsigned Op = (ISD == ISD::TRUNCATE) ? RISCV::VNSRL_WI
1300 : (ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1301 : RISCV::VFNCVT_F_F_W;
1303 for (; SrcEltSize != DstEltSize;) {
1304 MVT ElementMVT = (ISD == ISD::TRUNCATE)
1305 ? MVT::getIntegerVT(DstEltSize)
1306 : MVT::getFloatingPointVT(DstEltSize);
1307 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1308 DstEltSize =
1309 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1310 Cost += getRISCVInstructionCost(Op, DstMVT, CostKind);
1311 }
1312 return Cost;
1313 }
1314 case ISD::FP_TO_SINT:
1315 case ISD::FP_TO_UINT: {
1316 unsigned IsSigned = ISD == ISD::FP_TO_SINT;
1317 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1318 unsigned FWCVT =
1319 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1320 unsigned FNCVT =
1321 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1322 unsigned SrcEltSize = Src->getScalarSizeInBits();
1323 unsigned DstEltSize = Dst->getScalarSizeInBits();
1325 if ((SrcEltSize == 16) &&
1326 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1327 // If the target only supports zvfhmin or it is fp16-to-i64 conversion
1328 // pre-widening to f32 and then convert f32 to integer
1329 VectorType *VecF32Ty =
1330 VectorType::get(Type::getFloatTy(Dst->getContext()),
1331 cast<VectorType>(Dst)->getElementCount());
1332 std::pair<InstructionCost, MVT> VecF32LT =
1333 getTypeLegalizationCost(VecF32Ty);
1334 Cost +=
1335 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1336 VecF32LT.second, CostKind);
1337 Cost += getCastInstrCost(Opcode, Dst, VecF32Ty, CCH, CostKind, I);
1338 return Cost;
1339 }
1340 if (DstEltSize == SrcEltSize)
1341 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1342 else if (DstEltSize > SrcEltSize)
1343 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1344 else { // (SrcEltSize > DstEltSize)
1345 // First do a narrowing conversion to an integer half the size, then
1346 // truncate if needed.
1347 MVT ElementVT = MVT::getIntegerVT(SrcEltSize / 2);
1348 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1349 Cost += getRISCVInstructionCost(FNCVT, VecVT, CostKind);
1350 if ((SrcEltSize / 2) > DstEltSize) {
1351 Type *VecTy = EVT(VecVT).getTypeForEVT(Dst->getContext());
1352 Cost +=
1353 getCastInstrCost(Instruction::Trunc, Dst, VecTy, CCH, CostKind, I);
1354 }
1355 }
1356 return Cost;
1357 }
1358 case ISD::SINT_TO_FP:
1359 case ISD::UINT_TO_FP: {
1360 unsigned IsSigned = ISD == ISD::SINT_TO_FP;
1361 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1362 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1363 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1364 unsigned SrcEltSize = Src->getScalarSizeInBits();
1365 unsigned DstEltSize = Dst->getScalarSizeInBits();
1366
1368 if ((DstEltSize == 16) &&
1369 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1370 // If the target only supports zvfhmin or it is i64-to-fp16 conversion
1371 // it is converted to f32 and then converted to f16
1372 VectorType *VecF32Ty =
1373 VectorType::get(Type::getFloatTy(Dst->getContext()),
1374 cast<VectorType>(Dst)->getElementCount());
1375 std::pair<InstructionCost, MVT> VecF32LT =
1376 getTypeLegalizationCost(VecF32Ty);
1377 Cost += getCastInstrCost(Opcode, VecF32Ty, Src, CCH, CostKind, I);
1378 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1379 DstLT.second, CostKind);
1380 return Cost;
1381 }
1382
1383 if (DstEltSize == SrcEltSize)
1384 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1385 else if (DstEltSize > SrcEltSize) {
1386 if ((DstEltSize / 2) > SrcEltSize) {
1387 VectorType *VecTy =
1388 VectorType::get(IntegerType::get(Dst->getContext(), DstEltSize / 2),
1389 cast<VectorType>(Dst)->getElementCount());
1390 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1391 Cost += getCastInstrCost(Op, VecTy, Src, CCH, CostKind, I);
1392 }
1393 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1394 } else
1395 Cost += getRISCVInstructionCost(FNCVT, DstLT.second, CostKind);
1396 return Cost;
1397 }
1398 }
1399 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
1400}
1401
1402unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) {
1403 if (isa<ScalableVectorType>(Ty)) {
1404 const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
1405 const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();
1406 const unsigned VectorBits = *getVScaleForTuning() * RISCV::RVVBitsPerBlock;
1407 return RISCVTargetLowering::computeVLMAX(VectorBits, EltSize, MinSize);
1408 }
1409 return cast<FixedVectorType>(Ty)->getNumElements();
1410}
1411
1414 FastMathFlags FMF,
1416 if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
1417 return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
1418
1419 // Skip if scalar size of Ty is bigger than ELEN.
1420 if (Ty->getScalarSizeInBits() > ST->getELen())
1421 return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
1422
1423 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1424 if (Ty->getElementType()->isIntegerTy(1)) {
1425 // SelectionDAGBuilder does following transforms:
1426 // vector_reduce_{smin,umax}(<n x i1>) --> vector_reduce_or(<n x i1>)
1427 // vector_reduce_{smax,umin}(<n x i1>) --> vector_reduce_and(<n x i1>)
1428 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1429 return getArithmeticReductionCost(Instruction::Or, Ty, FMF, CostKind);
1430 else
1431 return getArithmeticReductionCost(Instruction::And, Ty, FMF, CostKind);
1432 }
1433
1434 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1436 InstructionCost ExtraCost = 0;
1437 switch (IID) {
1438 case Intrinsic::maximum:
1439 if (FMF.noNaNs()) {
1440 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1441 } else {
1442 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1443 RISCV::VFMV_F_S};
1444 // Cost of Canonical Nan + branch
1445 // lui a0, 523264
1446 // fmv.w.x fa0, a0
1447 Type *DstTy = Ty->getScalarType();
1448 const unsigned EltTyBits = DstTy->getScalarSizeInBits();
1449 Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
1450 ExtraCost = 1 +
1451 getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,
1453 getCFInstrCost(Instruction::Br, CostKind);
1454 }
1455 break;
1456
1457 case Intrinsic::minimum:
1458 if (FMF.noNaNs()) {
1459 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1460 } else {
1461 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1462 RISCV::VFMV_F_S};
1463 // Cost of Canonical Nan + branch
1464 // lui a0, 523264
1465 // fmv.w.x fa0, a0
1466 Type *DstTy = Ty->getScalarType();
1467 const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
1468 Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
1469 ExtraCost = 1 +
1470 getCastInstrCost(Instruction::UIToFP, DstTy, SrcTy,
1472 getCFInstrCost(Instruction::Br, CostKind);
1473 }
1474 break;
1475 }
1476 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1477 }
1478
1479 // IR Reduction is composed by one rvv reduction instruction and vmv
1480 unsigned SplitOp;
1482 switch (IID) {
1483 default:
1484 llvm_unreachable("Unsupported intrinsic");
1485 case Intrinsic::smax:
1486 SplitOp = RISCV::VMAX_VV;
1487 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1488 break;
1489 case Intrinsic::smin:
1490 SplitOp = RISCV::VMIN_VV;
1491 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1492 break;
1493 case Intrinsic::umax:
1494 SplitOp = RISCV::VMAXU_VV;
1495 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1496 break;
1497 case Intrinsic::umin:
1498 SplitOp = RISCV::VMINU_VV;
1499 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1500 break;
1501 case Intrinsic::maxnum:
1502 SplitOp = RISCV::VFMAX_VV;
1503 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1504 break;
1505 case Intrinsic::minnum:
1506 SplitOp = RISCV::VFMIN_VV;
1507 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1508 break;
1509 }
1510 // Add a cost for data larger than LMUL8
1511 InstructionCost SplitCost =
1512 (LT.first > 1) ? (LT.first - 1) *
1513 getRISCVInstructionCost(SplitOp, LT.second, CostKind)
1514 : 0;
1515 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1516}
1517
1520 std::optional<FastMathFlags> FMF,
1522 if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
1523 return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
1524
1525 // Skip if scalar size of Ty is bigger than ELEN.
1526 if (Ty->getScalarSizeInBits() > ST->getELen())
1527 return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
1528
1529 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1530 assert(ISD && "Invalid opcode");
1531
1532 if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND &&
1533 ISD != ISD::FADD)
1534 return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
1535
1536 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1537 Type *ElementTy = Ty->getElementType();
1538 if (ElementTy->isIntegerTy(1)) {
1539 if (ISD == ISD::AND) {
1540 // Example sequences:
1541 // vsetvli a0, zero, e8, mf8, ta, ma
1542 // vmand.mm v8, v9, v8 ; needed every time type is split
1543 // vmnot.m v8, v0
1544 // vcpop.m a0, v8
1545 // seqz a0, a0
1546 return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
1547 CostKind) +
1548 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
1549 getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
1551 } else if (ISD == ISD::XOR) {
1552 // Example sequences:
1553 // vsetvli a0, zero, e8, mf8, ta, ma
1554 // vmxor.mm v8, v0, v8 ; needed every time type is split
1555 // vcpop.m a0, v8
1556 // andi a0, a0, 1
1557 return (LT.first - 1) *
1558 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind) +
1559 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) + 1;
1560 } else {
1561 // Example sequences:
1562 // vsetvli a0, zero, e8, mf8, ta, ma
1563 // vmxor.mm v8, v9, v8 ; needed every time type is split
1564 // vcpop.m a0, v0
1565 // snez a0, a0
1566 return (LT.first - 1) *
1567 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind) +
1568 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
1569 getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
1571 }
1572 }
1573
1574 // IR Reduction of or/and is composed by one vmv and one rvv reduction
1575 // instruction, and others is composed by two vmv and one rvv reduction
1576 // instruction
1577 unsigned SplitOp;
1579 switch (ISD) {
1580 case ISD::ADD:
1581 SplitOp = RISCV::VADD_VV;
1582 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1583 break;
1584 case ISD::OR:
1585 SplitOp = RISCV::VOR_VV;
1586 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1587 break;
1588 case ISD::XOR:
1589 SplitOp = RISCV::VXOR_VV;
1590 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1591 break;
1592 case ISD::AND:
1593 SplitOp = RISCV::VAND_VV;
1594 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1595 break;
1596 case ISD::FADD:
1597 // We can't promote f16/bf16 fadd reductions.
1598 if ((LT.second.getVectorElementType() == MVT::f16 &&
1599 !ST->hasVInstructionsF16()) ||
1600 LT.second.getVectorElementType() == MVT::bf16)
1601 return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
1603 Opcodes.push_back(RISCV::VFMV_S_F);
1604 for (unsigned i = 0; i < LT.first.getValue(); i++)
1605 Opcodes.push_back(RISCV::VFREDOSUM_VS);
1606 Opcodes.push_back(RISCV::VFMV_F_S);
1607 return getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1608 }
1609 SplitOp = RISCV::VFADD_VV;
1610 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1611 break;
1612 }
1613 // Add a cost for data larger than LMUL8
1614 InstructionCost SplitCost =
1615 (LT.first > 1) ? (LT.first - 1) *
1616 getRISCVInstructionCost(SplitOp, LT.second, CostKind)
1617 : 0;
1618 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1619}
1620
1622 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
1624 if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
1625 return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
1626 FMF, CostKind);
1627
1628 // Skip if scalar size of ResTy is bigger than ELEN.
1629 if (ResTy->getScalarSizeInBits() > ST->getELen())
1630 return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
1631 FMF, CostKind);
1632
1633 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1634 return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
1635 FMF, CostKind);
1636
1637 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
1638
1639 if (IsUnsigned && Opcode == Instruction::Add &&
1640 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
1641 // Represent vector_reduce_add(ZExt(<n x i1>)) as
1642 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
1643 return LT.first *
1644 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind);
1645 }
1646
1647 if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
1648 return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
1649 FMF, CostKind);
1650
1651 return (LT.first - 1) +
1652 getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
1653}
1654
1656 TTI::OperandValueInfo OpInfo,
1658 assert(OpInfo.isConstant() && "non constant operand?");
1659 if (!isa<VectorType>(Ty))
1660 // FIXME: We need to account for immediate materialization here, but doing
1661 // a decent job requires more knowledge about the immediate than we
1662 // currently have here.
1663 return 0;
1664
1665 if (OpInfo.isUniform())
1666 // vmv.x.i, vmv.v.x, or vfmv.v.f
1667 // We ignore the cost of the scalar constant materialization to be consistent
1668 // with how we treat scalar constants themselves just above.
1669 return 1;
1670
1671 return getConstantPoolLoadCost(Ty, CostKind);
1672}
1673
1674
1676 MaybeAlign Alignment,
1677 unsigned AddressSpace,
1679 TTI::OperandValueInfo OpInfo,
1680 const Instruction *I) {
1681 EVT VT = TLI->getValueType(DL, Src, true);
1682 // Type legalization can't handle structs
1683 if (VT == MVT::Other)
1684 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
1685 CostKind, OpInfo, I);
1686
1688 if (Opcode == Instruction::Store && OpInfo.isConstant())
1689 Cost += getStoreImmCost(Src, OpInfo, CostKind);
1690
1691 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
1692
1693 InstructionCost BaseCost = [&]() {
1694 InstructionCost Cost = LT.first;
1696 return Cost;
1697
1698 // Our actual lowering for the case where a wider legal type is available
1699 // uses the a VL predicated load on the wider type. This is reflected in
1700 // the result of getTypeLegalizationCost, but BasicTTI assumes the
1701 // widened cases are scalarized.
1702 const DataLayout &DL = this->getDataLayout();
1703 if (Src->isVectorTy() && LT.second.isVector() &&
1705 LT.second.getSizeInBits()))
1706 return Cost;
1707
1708 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
1709 CostKind, OpInfo, I);
1710 }();
1711
1712 // Assume memory ops cost scale with the number of vector registers
1713 // possible accessed by the instruction. Note that BasicTTI already
1714 // handles the LT.first term for us.
1715 if (LT.second.isVector() && CostKind != TTI::TCK_CodeSize)
1716 BaseCost *= TLI->getLMULCost(LT.second);
1717 return Cost + BaseCost;
1718
1719}
1720
1722 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
1724 TTI::OperandValueInfo Op2Info, const Instruction *I) {
1726 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1727 Op1Info, Op2Info, I);
1728
1729 if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
1730 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1731 Op1Info, Op2Info, I);
1732
1733 // Skip if scalar size of ValTy is bigger than ELEN.
1734 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
1735 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1736 Op1Info, Op2Info, I);
1737
1738 auto GetConstantMatCost =
1739 [&](TTI::OperandValueInfo OpInfo) -> InstructionCost {
1740 if (OpInfo.isUniform())
1741 // We return 0 we currently ignore the cost of materializing scalar
1742 // constants in GPRs.
1743 return 0;
1744
1745 return getConstantPoolLoadCost(ValTy, CostKind);
1746 };
1747
1748 InstructionCost ConstantMatCost;
1749 if (Op1Info.isConstant())
1750 ConstantMatCost += GetConstantMatCost(Op1Info);
1751 if (Op2Info.isConstant())
1752 ConstantMatCost += GetConstantMatCost(Op2Info);
1753
1754 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
1755 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
1756 if (CondTy->isVectorTy()) {
1757 if (ValTy->getScalarSizeInBits() == 1) {
1758 // vmandn.mm v8, v8, v9
1759 // vmand.mm v9, v0, v9
1760 // vmor.mm v0, v9, v8
1761 return ConstantMatCost +
1762 LT.first *
1763 getRISCVInstructionCost(
1764 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1765 LT.second, CostKind);
1766 }
1767 // vselect and max/min are supported natively.
1768 return ConstantMatCost +
1769 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
1770 CostKind);
1771 }
1772
1773 if (ValTy->getScalarSizeInBits() == 1) {
1774 // vmv.v.x v9, a0
1775 // vmsne.vi v9, v9, 0
1776 // vmandn.mm v8, v8, v9
1777 // vmand.mm v9, v0, v9
1778 // vmor.mm v0, v9, v8
1779 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
1780 return ConstantMatCost +
1781 LT.first *
1782 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
1783 InterimVT, CostKind) +
1784 LT.first * getRISCVInstructionCost(
1785 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1786 LT.second, CostKind);
1787 }
1788
1789 // vmv.v.x v10, a0
1790 // vmsne.vi v0, v10, 0
1791 // vmerge.vvm v8, v9, v8, v0
1792 return ConstantMatCost +
1793 LT.first * getRISCVInstructionCost(
1794 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
1795 LT.second, CostKind);
1796 }
1797
1798 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
1799 CmpInst::isIntPredicate(VecPred)) {
1800 // Use VMSLT_VV to represent VMSEQ, VMSNE, VMSLTU, VMSLEU, VMSLT, VMSLE
1801 // provided they incur the same cost across all implementations
1802 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
1803 LT.second,
1804 CostKind);
1805 }
1806
1807 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
1808 CmpInst::isFPPredicate(VecPred)) {
1809
1810 // Use VMXOR_MM and VMXNOR_MM to generate all true/false mask
1811 if ((VecPred == CmpInst::FCMP_FALSE) || (VecPred == CmpInst::FCMP_TRUE))
1812 return ConstantMatCost +
1813 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind);
1814
1815 // If we do not support the input floating point vector type, use the base
1816 // one which will calculate as:
1817 // ScalarizeCost + Num * Cost for fixed vector,
1818 // InvalidCost for scalable vector.
1819 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
1820 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
1821 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
1822 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1823 Op1Info, Op2Info, I);
1824
1825 // Assuming vector fp compare and mask instructions are all the same cost
1826 // until a need arises to differentiate them.
1827 switch (VecPred) {
1828 case CmpInst::FCMP_ONE: // vmflt.vv + vmflt.vv + vmor.mm
1829 case CmpInst::FCMP_ORD: // vmfeq.vv + vmfeq.vv + vmand.mm
1830 case CmpInst::FCMP_UNO: // vmfne.vv + vmfne.vv + vmor.mm
1831 case CmpInst::FCMP_UEQ: // vmflt.vv + vmflt.vv + vmnor.mm
1832 return ConstantMatCost +
1833 LT.first * getRISCVInstructionCost(
1834 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
1835 LT.second, CostKind);
1836
1837 case CmpInst::FCMP_UGT: // vmfle.vv + vmnot.m
1838 case CmpInst::FCMP_UGE: // vmflt.vv + vmnot.m
1839 case CmpInst::FCMP_ULT: // vmfle.vv + vmnot.m
1840 case CmpInst::FCMP_ULE: // vmflt.vv + vmnot.m
1841 return ConstantMatCost +
1842 LT.first *
1843 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
1844 LT.second, CostKind);
1845
1846 case CmpInst::FCMP_OEQ: // vmfeq.vv
1847 case CmpInst::FCMP_OGT: // vmflt.vv
1848 case CmpInst::FCMP_OGE: // vmfle.vv
1849 case CmpInst::FCMP_OLT: // vmflt.vv
1850 case CmpInst::FCMP_OLE: // vmfle.vv
1851 case CmpInst::FCMP_UNE: // vmfne.vv
1852 return ConstantMatCost +
1853 LT.first *
1854 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind);
1855 default:
1856 break;
1857 }
1858 }
1859
1860 // With ShortForwardBranchOpt or ConditionalMoveFusion, scalar icmp + select
1861 // instructions will lower to SELECT_CC and lower to PseudoCCMOVGPR which will
1862 // generate a conditional branch + mv. The cost of scalar (icmp + select) will
1863 // be (0 + select instr cost).
1864 if (ST->hasConditionalMoveFusion() && I && isa<ICmpInst>(I) &&
1865 ValTy->isIntegerTy() && !I->user_empty()) {
1866 if (all_of(I->users(), [&](const User *U) {
1867 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
1868 U->getType()->isIntegerTy() &&
1869 !isa<ConstantData>(U->getOperand(1)) &&
1870 !isa<ConstantData>(U->getOperand(2));
1871 }))
1872 return 0;
1873 }
1874
1875 // TODO: Add cost for scalar type.
1876
1877 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1878 Op1Info, Op2Info, I);
1879}
1880
1883 const Instruction *I) {
1885 return Opcode == Instruction::PHI ? 0 : 1;
1886 // Branches are assumed to be predicted.
1887 return 0;
1888}
1889
1892 unsigned Index, Value *Op0,
1893 Value *Op1) {
1894 assert(Val->isVectorTy() && "This must be a vector type");
1895
1896 if (Opcode != Instruction::ExtractElement &&
1897 Opcode != Instruction::InsertElement)
1898 return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
1899
1900 // Legalize the type.
1901 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Val);
1902
1903 // This type is legalized to a scalar type.
1904 if (!LT.second.isVector()) {
1905 auto *FixedVecTy = cast<FixedVectorType>(Val);
1906 // If Index is a known constant, cost is zero.
1907 if (Index != -1U)
1908 return 0;
1909 // Extract/InsertElement with non-constant index is very costly when
1910 // scalarized; estimate cost of loads/stores sequence via the stack:
1911 // ExtractElement cost: store vector to stack, load scalar;
1912 // InsertElement cost: store vector to stack, store scalar, load vector.
1913 Type *ElemTy = FixedVecTy->getElementType();
1914 auto NumElems = FixedVecTy->getNumElements();
1915 auto Align = DL.getPrefTypeAlign(ElemTy);
1916 InstructionCost LoadCost =
1917 getMemoryOpCost(Instruction::Load, ElemTy, Align, 0, CostKind);
1918 InstructionCost StoreCost =
1919 getMemoryOpCost(Instruction::Store, ElemTy, Align, 0, CostKind);
1920 return Opcode == Instruction::ExtractElement
1921 ? StoreCost * NumElems + LoadCost
1922 : (StoreCost + LoadCost) * NumElems + StoreCost;
1923 }
1924
1925 // For unsupported scalable vector.
1926 if (LT.second.isScalableVector() && !LT.first.isValid())
1927 return LT.first;
1928
1929 // Mask vector extract/insert is expanded via e8.
1930 if (Val->getScalarSizeInBits() == 1) {
1931 VectorType *WideTy =
1933 cast<VectorType>(Val)->getElementCount());
1934 if (Opcode == Instruction::ExtractElement) {
1935 InstructionCost ExtendCost
1936 = getCastInstrCost(Instruction::ZExt, WideTy, Val,
1938 InstructionCost ExtractCost
1939 = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);
1940 return ExtendCost + ExtractCost;
1941 }
1942 InstructionCost ExtendCost
1943 = getCastInstrCost(Instruction::ZExt, WideTy, Val,
1945 InstructionCost InsertCost
1946 = getVectorInstrCost(Opcode, WideTy, CostKind, Index, nullptr, nullptr);
1947 InstructionCost TruncCost
1948 = getCastInstrCost(Instruction::Trunc, Val, WideTy,
1950 return ExtendCost + InsertCost + TruncCost;
1951 }
1952
1953
1954 // In RVV, we could use vslidedown + vmv.x.s to extract element from vector
1955 // and vslideup + vmv.s.x to insert element to vector.
1956 unsigned BaseCost = 1;
1957 // When insertelement we should add the index with 1 as the input of vslideup.
1958 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
1959
1960 if (Index != -1U) {
1961 // The type may be split. For fixed-width vectors we can normalize the
1962 // index to the new type.
1963 if (LT.second.isFixedLengthVector()) {
1964 unsigned Width = LT.second.getVectorNumElements();
1965 Index = Index % Width;
1966 }
1967
1968 // If exact VLEN is known, we will insert/extract into the appropriate
1969 // subvector with no additional subvector insert/extract cost.
1970 if (auto VLEN = ST->getRealVLen()) {
1971 unsigned EltSize = LT.second.getScalarSizeInBits();
1972 unsigned M1Max = *VLEN / EltSize;
1973 Index = Index % M1Max;
1974 }
1975
1976 // We could extract/insert the first element without vslidedown/vslideup.
1977 if (Index == 0)
1978 SlideCost = 0;
1979 else if (Opcode == Instruction::InsertElement)
1980 SlideCost = 1; // With a constant index, we do not need to use addi.
1981 }
1982
1983 // When the vector needs to split into multiple register groups and the index
1984 // exceeds single vector register group, we need to insert/extract the element
1985 // via stack.
1986 if (LT.first > 1 &&
1987 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
1988 LT.second.isScalableVector()))) {
1989 Type *ScalarType = Val->getScalarType();
1990 Align VecAlign = DL.getPrefTypeAlign(Val);
1991 Align SclAlign = DL.getPrefTypeAlign(ScalarType);
1992 // Extra addi for unknown index.
1993 InstructionCost IdxCost = Index == -1U ? 1 : 0;
1994
1995 // Store all split vectors into stack and load the target element.
1996 if (Opcode == Instruction::ExtractElement)
1997 return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
1998 getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
1999 CostKind) +
2000 IdxCost;
2001
2002 // Store all split vectors into stack and store the target element and load
2003 // vectors back.
2004 return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
2005 getMemoryOpCost(Instruction::Load, Val, VecAlign, 0, CostKind) +
2006 getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
2007 CostKind) +
2008 IdxCost;
2009 }
2010
2011 // Extract i64 in the target that has XLEN=32 need more instruction.
2012 if (Val->getScalarType()->isIntegerTy() &&
2013 ST->getXLen() < Val->getScalarSizeInBits()) {
2014 // For extractelement, we need the following instructions:
2015 // vsetivli zero, 1, e64, m1, ta, mu (not count)
2016 // vslidedown.vx v8, v8, a0
2017 // vmv.x.s a0, v8
2018 // li a1, 32
2019 // vsrl.vx v8, v8, a1
2020 // vmv.x.s a1, v8
2021
2022 // For insertelement, we need the following instructions:
2023 // vsetivli zero, 2, e32, m4, ta, mu (not count)
2024 // vmv.v.i v12, 0
2025 // vslide1up.vx v16, v12, a1
2026 // vslide1up.vx v12, v16, a0
2027 // addi a0, a2, 1
2028 // vsetvli zero, a0, e64, m4, tu, mu (not count)
2029 // vslideup.vx v8, v12, a2
2030
2031 // TODO: should we count these special vsetvlis?
2032 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2033 }
2034 return BaseCost + SlideCost;
2035}
2036
2038 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2040 ArrayRef<const Value *> Args, const Instruction *CxtI) {
2041
2042 // TODO: Handle more cost kinds.
2044 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
2045 Args, CxtI);
2046
2047 if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
2048 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
2049 Args, CxtI);
2050
2051 // Skip if scalar size of Ty is bigger than ELEN.
2052 if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELen())
2053 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
2054 Args, CxtI);
2055
2056 // Legalize the type.
2057 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
2058
2059 // TODO: Handle scalar type.
2060 if (!LT.second.isVector())
2061 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
2062 Args, CxtI);
2063
2064 // f16 with zvfhmin and bf16 will be promoted to f32.
2065 // FIXME: nxv32[b]f16 will be custom lowered and split.
2066 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2067 InstructionCost CastCost = 0;
2068 if ((LT.second.getVectorElementType() == MVT::f16 ||
2069 LT.second.getVectorElementType() == MVT::bf16) &&
2070 TLI->getOperationAction(ISDOpcode, LT.second) ==
2072 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2073 Type *PromotedTy = EVT(PromotedVT).getTypeForEVT(Ty->getContext());
2074 Type *LegalTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
2075 // Add cost of extending arguments
2076 CastCost += LT.first * Args.size() *
2077 getCastInstrCost(Instruction::FPExt, PromotedTy, LegalTy,
2079 // Add cost of truncating result
2080 CastCost +=
2081 LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy,
2083 // Compute cost of op in promoted type
2084 LT.second = PromotedVT;
2085 }
2086
2087 auto getConstantMatCost =
2088 [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
2089 if (OpInfo.isUniform() && canSplatOperand(Opcode, Operand))
2090 // Two sub-cases:
2091 // * Has a 5 bit immediate operand which can be splatted.
2092 // * Has a larger immediate which must be materialized in scalar register
2093 // We return 0 for both as we currently ignore the cost of materializing
2094 // scalar constants in GPRs.
2095 return 0;
2096
2097 return getConstantPoolLoadCost(Ty, CostKind);
2098 };
2099
2100 // Add the cost of materializing any constant vectors required.
2101 InstructionCost ConstantMatCost = 0;
2102 if (Op1Info.isConstant())
2103 ConstantMatCost += getConstantMatCost(0, Op1Info);
2104 if (Op2Info.isConstant())
2105 ConstantMatCost += getConstantMatCost(1, Op2Info);
2106
2107 unsigned Op;
2108 switch (ISDOpcode) {
2109 case ISD::ADD:
2110 case ISD::SUB:
2111 Op = RISCV::VADD_VV;
2112 break;
2113 case ISD::SHL:
2114 case ISD::SRL:
2115 case ISD::SRA:
2116 Op = RISCV::VSLL_VV;
2117 break;
2118 case ISD::AND:
2119 case ISD::OR:
2120 case ISD::XOR:
2121 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2122 break;
2123 case ISD::MUL:
2124 case ISD::MULHS:
2125 case ISD::MULHU:
2126 Op = RISCV::VMUL_VV;
2127 break;
2128 case ISD::SDIV:
2129 case ISD::UDIV:
2130 Op = RISCV::VDIV_VV;
2131 break;
2132 case ISD::SREM:
2133 case ISD::UREM:
2134 Op = RISCV::VREM_VV;
2135 break;
2136 case ISD::FADD:
2137 case ISD::FSUB:
2138 Op = RISCV::VFADD_VV;
2139 break;
2140 case ISD::FMUL:
2141 Op = RISCV::VFMUL_VV;
2142 break;
2143 case ISD::FDIV:
2144 Op = RISCV::VFDIV_VV;
2145 break;
2146 case ISD::FNEG:
2147 Op = RISCV::VFSGNJN_VV;
2148 break;
2149 default:
2150 // Assuming all other instructions have the same cost until a need arises to
2151 // differentiate them.
2152 return CastCost + ConstantMatCost +
2153 BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
2154 Args, CxtI);
2155 }
2156
2157 InstructionCost InstrCost = getRISCVInstructionCost(Op, LT.second, CostKind);
2158 // We use BasicTTIImpl to calculate scalar costs, which assumes floating point
2159 // ops are twice as expensive as integer ops. Do the same for vectors so
2160 // scalar floating point ops aren't cheaper than their vector equivalents.
2161 if (Ty->isFPOrFPVectorTy())
2162 InstrCost *= 2;
2163 return CastCost + ConstantMatCost + LT.first * InstrCost;
2164}
2165
2166// TODO: Deduplicate from TargetTransformInfoImplCRTPBase.
2168 ArrayRef<const Value *> Ptrs, const Value *Base,
2169 const TTI::PointersChainInfo &Info, Type *AccessTy,
2172 // In the basic model we take into account GEP instructions only
2173 // (although here can come alloca instruction, a value, constants and/or
2174 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
2175 // pointer). Typically, if Base is a not a GEP-instruction and all the
2176 // pointers are relative to the same base address, all the rest are
2177 // either GEP instructions, PHIs, bitcasts or constants. When we have same
2178 // base, we just calculate cost of each non-Base GEP as an ADD operation if
2179 // any their index is a non-const.
2180 // If no known dependecies between the pointers cost is calculated as a sum
2181 // of costs of GEP instructions.
2182 for (auto [I, V] : enumerate(Ptrs)) {
2183 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
2184 if (!GEP)
2185 continue;
2186 if (Info.isSameBase() && V != Base) {
2187 if (GEP->hasAllConstantIndices())
2188 continue;
2189 // If the chain is unit-stride and BaseReg + stride*i is a legal
2190 // addressing mode, then presume the base GEP is sitting around in a
2191 // register somewhere and check if we can fold the offset relative to
2192 // it.
2193 unsigned Stride = DL.getTypeStoreSize(AccessTy);
2194 if (Info.isUnitStride() &&
2195 isLegalAddressingMode(AccessTy,
2196 /* BaseGV */ nullptr,
2197 /* BaseOffset */ Stride * I,
2198 /* HasBaseReg */ true,
2199 /* Scale */ 0,
2200 GEP->getType()->getPointerAddressSpace()))
2201 continue;
2202 Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind,
2203 {TTI::OK_AnyValue, TTI::OP_None},
2204 {TTI::OK_AnyValue, TTI::OP_None}, {});
2205 } else {
2206 SmallVector<const Value *> Indices(GEP->indices());
2207 Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
2208 Indices, AccessTy, CostKind);
2209 }
2210 }
2211 return Cost;
2212}
2213
2217 // TODO: More tuning on benchmarks and metrics with changes as needed
2218 // would apply to all settings below to enable performance.
2219
2220
2221 if (ST->enableDefaultUnroll())
2222 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
2223
2224 // Enable Upper bound unrolling universally, not dependant upon the conditions
2225 // below.
2226 UP.UpperBound = true;
2227
2228 // Disable loop unrolling for Oz and Os.
2229 UP.OptSizeThreshold = 0;
2231 if (L->getHeader()->getParent()->hasOptSize())
2232 return;
2233
2234 SmallVector<BasicBlock *, 4> ExitingBlocks;
2235 L->getExitingBlocks(ExitingBlocks);
2236 LLVM_DEBUG(dbgs() << "Loop has:\n"
2237 << "Blocks: " << L->getNumBlocks() << "\n"
2238 << "Exit blocks: " << ExitingBlocks.size() << "\n");
2239
2240 // Only allow another exit other than the latch. This acts as an early exit
2241 // as it mirrors the profitability calculation of the runtime unroller.
2242 if (ExitingBlocks.size() > 2)
2243 return;
2244
2245 // Limit the CFG of the loop body for targets with a branch predictor.
2246 // Allowing 4 blocks permits if-then-else diamonds in the body.
2247 if (L->getNumBlocks() > 4)
2248 return;
2249
2250 // Don't unroll vectorized loops, including the remainder loop
2251 if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
2252 return;
2253
2254 // Scan the loop: don't unroll loops with calls as this could prevent
2255 // inlining.
2257 for (auto *BB : L->getBlocks()) {
2258 for (auto &I : *BB) {
2259 // Initial setting - Don't unroll loops containing vectorized
2260 // instructions.
2261 if (I.getType()->isVectorTy())
2262 return;
2263
2264 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
2265 if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
2266 if (!isLoweredToCall(F))
2267 continue;
2268 }
2269 return;
2270 }
2271
2272 SmallVector<const Value *> Operands(I.operand_values());
2275 }
2276 }
2277
2278 LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
2279
2280 UP.Partial = true;
2281 UP.Runtime = true;
2282 UP.UnrollRemainder = true;
2283 UP.UnrollAndJam = true;
2284
2285 // Force unrolling small loops can be very useful because of the branch
2286 // taken cost of the backedge.
2287 if (Cost < 12)
2288 UP.Force = true;
2289}
2290
2294}
2295
2297 if (Ty->isVectorTy()) {
2298 // f16 with only zvfhmin and bf16 will be promoted to f32
2299 Type *EltTy = cast<VectorType>(Ty)->getElementType();
2300 if ((EltTy->isHalfTy() && !ST->hasVInstructionsF16()) ||
2301 EltTy->isBFloatTy())
2303 cast<VectorType>(Ty));
2304
2306 if (Size.isScalable() && ST->hasVInstructions())
2307 return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
2308
2310 return divideCeil(Size, ST->getRealMinVLen());
2311 }
2312
2313 return BaseT::getRegUsageForType(Ty);
2314}
2315
2316unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
2317 if (SLPMaxVF.getNumOccurrences())
2318 return SLPMaxVF;
2319
2320 // Return how many elements can fit in getRegisterBitwidth. This is the
2321 // same routine as used in LoopVectorizer. We should probably be
2322 // accounting for whether we actually have instructions with the right
2323 // lane type, but we don't have enough information to do that without
2324 // some additional plumbing which hasn't been justified yet.
2325 TypeSize RegWidth =
2327 // If no vector registers, or absurd element widths, disable
2328 // vectorization by returning 1.
2329 return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
2330}
2331
2333 const TargetTransformInfo::LSRCost &C2) {
2334 // RISC-V specific here are "instruction number 1st priority".
2335 // If we need to emit adds inside the loop to add up base registers, then
2336 // we need at least one extra temporary register.
2337 unsigned C1NumRegs = C1.NumRegs + (C1.NumBaseAdds != 0);
2338 unsigned C2NumRegs = C2.NumRegs + (C2.NumBaseAdds != 0);
2339 return std::tie(C1.Insns, C1NumRegs, C1.AddRecCost,
2340 C1.NumIVMuls, C1.NumBaseAdds,
2341 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
2342 std::tie(C2.Insns, C2NumRegs, C2.AddRecCost,
2343 C2.NumIVMuls, C2.NumBaseAdds,
2344 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
2345}
2346
2348 auto *VTy = dyn_cast<VectorType>(DataTy);
2349 if (!VTy || VTy->isScalableTy())
2350 return false;
2351
2352 if (!isLegalMaskedLoadStore(DataTy, Alignment))
2353 return false;
2354
2355 // FIXME: If it is an i8 vector and the element count exceeds 256, we should
2356 // scalarize these types with LMUL >= maximum fixed-length LMUL.
2357 if (VTy->getElementType()->isIntegerTy(8))
2358 if (VTy->getElementCount().getFixedValue() > 256)
2359 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
2361 return true;
2362}
2363
2365 auto *VTy = dyn_cast<VectorType>(DataTy);
2366 if (!VTy || VTy->isScalableTy())
2367 return false;
2368
2369 if (!isLegalMaskedLoadStore(DataTy, Alignment))
2370 return false;
2371 return true;
2372}
2373
2374/// See if \p I should be considered for address type promotion. We check if \p
2375/// I is a sext with right type and used in memory accesses. If it used in a
2376/// "complex" getelementptr, we allow it to be promoted without finding other
2377/// sext instructions that sign extended the same initial value. A getelementptr
2378/// is considered as "complex" if it has more than 2 operands.
2380 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
2381 bool Considerable = false;
2382 AllowPromotionWithoutCommonHeader = false;
2383 if (!isa<SExtInst>(&I))
2384 return false;
2385 Type *ConsideredSExtType =
2386 Type::getInt64Ty(I.getParent()->getParent()->getContext());
2387 if (I.getType() != ConsideredSExtType)
2388 return false;
2389 // See if the sext is the one with the right type and used in at least one
2390 // GetElementPtrInst.
2391 for (const User *U : I.users()) {
2392 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
2393 Considerable = true;
2394 // A getelementptr is considered as "complex" if it has more than 2
2395 // operands. We will promote a SExt used in such complex GEP as we
2396 // expect some computation to be merged if they are done on 64 bits.
2397 if (GEPInst->getNumOperands() > 2) {
2398 AllowPromotionWithoutCommonHeader = true;
2399 break;
2400 }
2401 }
2402 }
2403 return Considerable;
2404}
2405
2406bool RISCVTTIImpl::canSplatOperand(unsigned Opcode, int Operand) const {
2407 switch (Opcode) {
2408 case Instruction::Add:
2409 case Instruction::Sub:
2410 case Instruction::Mul:
2411 case Instruction::And:
2412 case Instruction::Or:
2413 case Instruction::Xor:
2414 case Instruction::FAdd:
2415 case Instruction::FSub:
2416 case Instruction::FMul:
2417 case Instruction::FDiv:
2418 case Instruction::ICmp:
2419 case Instruction::FCmp:
2420 return true;
2421 case Instruction::Shl:
2422 case Instruction::LShr:
2423 case Instruction::AShr:
2424 case Instruction::UDiv:
2425 case Instruction::SDiv:
2426 case Instruction::URem:
2427 case Instruction::SRem:
2428 case Instruction::Select:
2429 return Operand == 1;
2430 default:
2431 return false;
2432 }
2433}
2434
2436 if (!I->getType()->isVectorTy() || !ST->hasVInstructions())
2437 return false;
2438
2439 if (canSplatOperand(I->getOpcode(), Operand))
2440 return true;
2441
2442 auto *II = dyn_cast<IntrinsicInst>(I);
2443 if (!II)
2444 return false;
2445
2446 switch (II->getIntrinsicID()) {
2447 case Intrinsic::fma:
2448 case Intrinsic::vp_fma:
2449 case Intrinsic::fmuladd:
2450 case Intrinsic::vp_fmuladd:
2451 return Operand == 0 || Operand == 1;
2452 case Intrinsic::vp_shl:
2453 case Intrinsic::vp_lshr:
2454 case Intrinsic::vp_ashr:
2455 case Intrinsic::vp_udiv:
2456 case Intrinsic::vp_sdiv:
2457 case Intrinsic::vp_urem:
2458 case Intrinsic::vp_srem:
2459 case Intrinsic::ssub_sat:
2460 case Intrinsic::vp_ssub_sat:
2461 case Intrinsic::usub_sat:
2462 case Intrinsic::vp_usub_sat:
2463 case Intrinsic::vp_select:
2464 return Operand == 1;
2465 // These intrinsics are commutative.
2466 case Intrinsic::vp_add:
2467 case Intrinsic::vp_mul:
2468 case Intrinsic::vp_and:
2469 case Intrinsic::vp_or:
2470 case Intrinsic::vp_xor:
2471 case Intrinsic::vp_fadd:
2472 case Intrinsic::vp_fmul:
2473 case Intrinsic::vp_icmp:
2474 case Intrinsic::vp_fcmp:
2475 case Intrinsic::smin:
2476 case Intrinsic::vp_smin:
2477 case Intrinsic::umin:
2478 case Intrinsic::vp_umin:
2479 case Intrinsic::smax:
2480 case Intrinsic::vp_smax:
2481 case Intrinsic::umax:
2482 case Intrinsic::vp_umax:
2483 case Intrinsic::sadd_sat:
2484 case Intrinsic::vp_sadd_sat:
2485 case Intrinsic::uadd_sat:
2486 case Intrinsic::vp_uadd_sat:
2487 // These intrinsics have 'vr' versions.
2488 case Intrinsic::vp_sub:
2489 case Intrinsic::vp_fsub:
2490 case Intrinsic::vp_fdiv:
2491 return Operand == 0 || Operand == 1;
2492 default:
2493 return false;
2494 }
2495}
2496
2497/// Check if sinking \p I's operands to I's basic block is profitable, because
2498/// the operands can be folded into a target instruction, e.g.
2499/// splats of scalars can fold into vector instructions.
2501 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2502 using namespace llvm::PatternMatch;
2503
2504 if (!I->getType()->isVectorTy() || !ST->hasVInstructions())
2505 return false;
2506
2507 // Don't sink splat operands if the target prefers it. Some targets requires
2508 // S2V transfer buffers and we can run out of them copying the same value
2509 // repeatedly.
2510 // FIXME: It could still be worth doing if it would improve vector register
2511 // pressure and prevent a vector spill.
2512 if (!ST->sinkSplatOperands())
2513 return false;
2514
2515 for (auto OpIdx : enumerate(I->operands())) {
2516 if (!canSplatOperand(I, OpIdx.index()))
2517 continue;
2518
2519 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2520 // Make sure we are not already sinking this operand
2521 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2522 continue;
2523
2524 // We are looking for a splat that can be sunk.
2526 m_Undef(), m_ZeroMask())))
2527 continue;
2528
2529 // Don't sink i1 splats.
2530 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2531 continue;
2532
2533 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2534 // and vector registers
2535 for (Use &U : Op->uses()) {
2536 Instruction *Insn = cast<Instruction>(U.getUser());
2537 if (!canSplatOperand(Insn, U.getOperandNo()))
2538 return false;
2539 }
2540
2541 Ops.push_back(&Op->getOperandUse(0));
2542 Ops.push_back(&OpIdx.value());
2543 }
2544 return true;
2545}
2546
2548RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
2550 // TODO: Enable expansion when unaligned access is not supported after we fix
2551 // issues in ExpandMemcmp.
2552 if (!(ST->enableUnalignedScalarMem() &&
2553 (ST->hasStdExtZbb() || ST->hasStdExtZbkb() || IsZeroCmp)))
2554 return Options;
2555
2556 Options.AllowOverlappingLoads = true;
2557 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
2558 Options.NumLoadsPerBlock = Options.MaxNumLoads;
2559 if (ST->is64Bit())
2560 Options.LoadSizes = {8, 4, 2, 1};
2561 else
2562 Options.LoadSizes = {4, 2, 1};
2563 return Options;
2564}
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
Hexagon Common GEP
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static bool isRepeatedConcatMask(ArrayRef< int > Mask, int &SubVectorSize)
static unsigned isM1OrSmaller(MVT VT)
static cl::opt< unsigned > SLPMaxVF("riscv-v-slp-max-vf", cl::desc("Overrides result used for getMaximumVF query which is used " "exclusively by SLP vectorizer."), cl::Hidden)
static cl::opt< unsigned > RVVRegisterWidthLMUL("riscv-v-register-bit-width-lmul", cl::desc("The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), cl::init(2), cl::Hidden)
static InstructionCost getIntImmCostImpl(const DataLayout &DL, const RISCVSubtarget *ST, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, bool FreeZeroes)
static VectorType * getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, LLVMContext &C)
static const CostTblEntry VectorIntrinsicCostTable[]
static bool canUseShiftPair(Instruction *Inst, const APInt &Imm)
static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID)
This file defines a TargetTransformInfo::Concept conforming object specific to the RISC-V target mach...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file contains some templates that are useful if you are working with the STL at all.
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:623
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
Definition: BasicTTIImpl.h:801
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:800
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:479
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:695
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:923
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:807
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:959
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Definition: BasicTTIImpl.h:380
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:690
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:675
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
bool isFPPredicate() const
Definition: InstrTypes.h:780
bool isIntPredicate() const
Definition: InstrTypes.h:781
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:434
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:843
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:617
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:421
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
bool noNaNs() const
Definition: FMF.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:563
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
Definition: DerivedTypes.h:598
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
The optimization diagnostic interface.
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
unsigned getELen() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
MVT getContainerForFixedLengthVector(MVT VT) const
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVII::VLMUL getLMUL(MVT VT)
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
const DataLayout & getDataLayout() const
bool isLoweredToCall(const Function *F) const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ None
The cast is not used with a load/store of any kind.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
Definition: DerivedTypes.h:460
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:232
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:218
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:353
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition: CostTable.h:35
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
Definition: LoopInfo.cpp:1097
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1952
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
InstructionCost Cost
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition: STLExtras.h:2067
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Cost Table Entry.
Definition: CostTable.h:25
Extended Value Type.
Definition: ValueTypes.h:35
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).