Bug Summary

File:llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
Warning:line 223, column 16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

1//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARMTargetTransformInfo.h"
10#include "ARMSubtarget.h"
11#include "MCTargetDesc/ARMAddressingModes.h"
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/SmallVector.h"
14#include "llvm/Analysis/LoopInfo.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/ISDOpcodes.h"
17#include "llvm/CodeGen/ValueTypes.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/CallSite.h"
20#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/DerivedTypes.h"
22#include "llvm/IR/Instruction.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/IntrinsicInst.h"
25#include "llvm/IR/PatternMatch.h"
26#include "llvm/IR/Type.h"
27#include "llvm/MC/SubtargetFeature.h"
28#include "llvm/Support/Casting.h"
29#include "llvm/Support/MachineValueType.h"
30#include "llvm/Target/TargetMachine.h"
31#include <algorithm>
32#include <cassert>
33#include <cstdint>
34#include <utility>
35
36using namespace llvm;
37
38#define DEBUG_TYPE"armtti" "armtti"
39
40static cl::opt<bool> EnableMaskedLoadStores(
41 "enable-arm-maskedldst", cl::Hidden, cl::init(true),
42 cl::desc("Enable the generation of masked loads and stores"));
43
44static cl::opt<bool> DisableLowOverheadLoops(
45 "disable-arm-loloops", cl::Hidden, cl::init(false),
46 cl::desc("Disable the generation of low-overhead loops"));
47
48extern cl::opt<bool> DisableTailPredication;
49
50extern cl::opt<bool> EnableMaskedGatherScatters;
51
52bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
53 const Function *Callee) const {
54 const TargetMachine &TM = getTLI()->getTargetMachine();
55 const FeatureBitset &CallerBits =
56 TM.getSubtargetImpl(*Caller)->getFeatureBits();
57 const FeatureBitset &CalleeBits =
58 TM.getSubtargetImpl(*Callee)->getFeatureBits();
59
60 // To inline a callee, all features not in the whitelist must match exactly.
61 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
62 (CalleeBits & ~InlineFeatureWhitelist);
63 // For features in the whitelist, the callee's features must be a subset of
64 // the callers'.
65 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
66 (CalleeBits & InlineFeatureWhitelist);
67 return MatchExact && MatchSubset;
68}
69
70int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
71 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 71, __PRETTY_FUNCTION__))
;
9
'?' condition is true
72
73 unsigned Bits = Ty->getPrimitiveSizeInBits();
74 if (Bits == 0 || Imm.getActiveBits() >= 64)
10
Assuming 'Bits' is not equal to 0
11
Assuming the condition is false
12
Taking false branch
75 return 4;
76
77 int64_t SImmVal = Imm.getSExtValue();
78 uint64_t ZImmVal = Imm.getZExtValue();
79 if (!ST->isThumb()) {
13
Taking false branch
80 if ((SImmVal >= 0 && SImmVal < 65536) ||
81 (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
82 (ARM_AM::getSOImmVal(~ZImmVal) != -1))
83 return 1;
84 return ST->hasV6T2Ops() ? 2 : 3;
85 }
86 if (ST->isThumb2()) {
14
Taking false branch
87 if ((SImmVal >= 0 && SImmVal < 65536) ||
88 (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
89 (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
90 return 1;
91 return ST->hasV6T2Ops() ? 2 : 3;
92 }
93 // Thumb1, any i8 imm cost 1.
94 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
15
Assuming 'Bits' is not equal to 8
16
Assuming 'SImmVal' is < 0
95 return 1;
96 if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
17
Assuming the condition is false
18
Calling 'isThumbImmShiftedVal'
97 return 2;
98 // Load from constantpool.
99 return 3;
100}
101
102// Constants smaller than 256 fit in the immediate field of
103// Thumb1 instructions so we return a zero cost and 1 otherwise.
104int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
105 const APInt &Imm, Type *Ty) {
106 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
107 return 0;
108
109 return 1;
110}
111
112int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
113 Type *Ty) {
114 // Division by a constant can be turned into multiplication, but only if we
115 // know it's constant. So it's not so much that the immediate is cheap (it's
116 // not), but that the alternative is worse.
117 // FIXME: this is probably unneeded with GlobalISel.
118 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1
Assuming 'Opcode' is not equal to SDiv
2
Assuming 'Opcode' is not equal to UDiv
119 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
3
Assuming 'Opcode' is not equal to SRem
4
Assuming 'Opcode' is not equal to URem
120 Idx == 1)
121 return 0;
122
123 if (Opcode == Instruction::And) {
5
Assuming 'Opcode' is equal to And
6
Taking true branch
124 // UXTB/UXTH
125 if (Imm == 255 || Imm == 65535)
7
Taking false branch
126 return 0;
127 // Conversion to BIC is free, and means we can use ~Imm instead.
128 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
8
Calling 'ARMTTIImpl::getIntImmCost'
129 }
130
131 if (Opcode == Instruction::Add)
132 // Conversion to SUB is free, and means we can use -Imm instead.
133 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
134
135 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
136 Ty->getIntegerBitWidth() == 32) {
137 int64_t NegImm = -Imm.getSExtValue();
138 if (ST->isThumb2() && NegImm < 1<<12)
139 // icmp X, #-C -> cmn X, #C
140 return 0;
141 if (ST->isThumb() && NegImm < 1<<8)
142 // icmp X, #-C -> adds X, #C
143 return 0;
144 }
145
146 // xor a, -1 can always be folded to MVN
147 if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
148 return 0;
149
150 return getIntImmCost(Imm, Ty);
151}
152
153int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
154 const Instruction *I) {
155 int ISD = TLI->InstructionOpcodeToISD(Opcode);
156 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 156, __PRETTY_FUNCTION__))
;
157
158 // Single to/from double precision conversions.
159 static const CostTblEntry NEONFltDblTbl[] = {
160 // Vector fptrunc/fpext conversions.
161 { ISD::FP_ROUND, MVT::v2f64, 2 },
162 { ISD::FP_EXTEND, MVT::v2f32, 2 },
163 { ISD::FP_EXTEND, MVT::v4f32, 4 }
164 };
165
166 if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
167 ISD == ISD::FP_EXTEND)) {
168 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
169 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
170 return LT.first * Entry->Cost;
171 }
172
173 EVT SrcTy = TLI->getValueType(DL, Src);
174 EVT DstTy = TLI->getValueType(DL, Dst);
175
176 if (!SrcTy.isSimple() || !DstTy.isSimple())
177 return BaseT::getCastInstrCost(Opcode, Dst, Src);
178
179 // The extend of a load is free
180 if (I && isa<LoadInst>(I->getOperand(0))) {
181 static const TypeConversionCostTblEntry LoadConversionTbl[] = {
182 {ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
183 {ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
184 {ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0},
185 {ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0},
186 {ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0},
187 {ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0},
188 {ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1},
189 {ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1},
190 {ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1},
191 {ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1},
192 {ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1},
193 {ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1},
194 };
195 if (const auto *Entry = ConvertCostTableLookup(
196 LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
197 return Entry->Cost;
198
199 static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
200 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0},
201 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0},
202 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0},
203 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0},
204 {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0},
205 {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0},
206 };
207 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
208 if (const auto *Entry =
209 ConvertCostTableLookup(MVELoadConversionTbl, ISD,
210 DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
211 return Entry->Cost;
212 }
213 }
214
215 // Some arithmetic, load and store operations have specific instructions
216 // to cast up/down their types automatically at no extra cost.
217 // TODO: Get these tables to know at least what the related operations are.
218 static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
219 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
220 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
221 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
222 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
223 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
224 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
225
226 // The number of vmovl instructions for the extension.
227 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
228 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
229 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
230 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
231 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
232 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
233 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
234 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
235 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
236 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
237
238 // Operations that we legalize using splitting.
239 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
240 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
241
242 // Vector float <-> i32 conversions.
243 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
244 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
245
246 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
247 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
248 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
249 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
250 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
251 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
252 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
253 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
254 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
255 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
256 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
257 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
258 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
259 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
260 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
261 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
262 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
263 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
264 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
265 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
266
267 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
268 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
269 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
270 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
271 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
272 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
273
274 // Vector double <-> i32 conversions.
275 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
276 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
277
278 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
279 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
280 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
281 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
282 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
283 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
284
285 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
286 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
287 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
288 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
289 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
290 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
291 };
292
293 if (SrcTy.isVector() && ST->hasNEON()) {
294 if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
295 DstTy.getSimpleVT(),
296 SrcTy.getSimpleVT()))
297 return Entry->Cost;
298 }
299
300 // Scalar float to integer conversions.
301 static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
302 { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
303 { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
304 { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
305 { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
306 { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
307 { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
308 { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
309 { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
310 { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
311 { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
312 { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
313 { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
314 { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
315 { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
316 { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
317 { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
318 { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
319 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
320 { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
321 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
322 };
323 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
324 if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
325 DstTy.getSimpleVT(),
326 SrcTy.getSimpleVT()))
327 return Entry->Cost;
328 }
329
330 // Scalar integer to float conversions.
331 static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
332 { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
333 { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
334 { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
335 { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
336 { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
337 { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
338 { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
339 { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
340 { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
341 { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
342 { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
343 { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
344 { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
345 { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
346 { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
347 { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
348 { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
349 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
350 { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
351 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
352 };
353
354 if (SrcTy.isInteger() && ST->hasNEON()) {
355 if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
356 ISD, DstTy.getSimpleVT(),
357 SrcTy.getSimpleVT()))
358 return Entry->Cost;
359 }
360
361 // MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one
362 // instruction, i8->i32 is two. i64 zexts are an VAND with a constant, sext
363 // are linearised so take more.
364 static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = {
365 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
366 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
367 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
368 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
369 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 },
370 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 },
371 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
372 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
373 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 },
374 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
375 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 },
376 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 },
377 };
378
379 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
380 if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl,
381 ISD, DstTy.getSimpleVT(),
382 SrcTy.getSimpleVT()))
383 return Entry->Cost * ST->getMVEVectorCostFactor();
384 }
385
386 // Scalar integer conversion costs.
387 static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
388 // i16 -> i64 requires two dependent operations.
389 { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
390
391 // Truncates on i64 are assumed to be free.
392 { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
393 { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
394 { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
395 { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
396 };
397
398 if (SrcTy.isInteger()) {
399 if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
400 DstTy.getSimpleVT(),
401 SrcTy.getSimpleVT()))
402 return Entry->Cost;
403 }
404
405 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
406 ? ST->getMVEVectorCostFactor()
407 : 1;
408 return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src);
409}
410
411int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
412 unsigned Index) {
413 // Penalize inserting into an D-subregister. We end up with a three times
414 // lower estimated throughput on swift.
415 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
416 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
417 return 3;
418
419 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
420 Opcode == Instruction::ExtractElement)) {
421 // Cross-class copies are expensive on many microarchitectures,
422 // so assume they are expensive by default.
423 if (ValTy->getVectorElementType()->isIntegerTy())
424 return 3;
425
426 // Even if it's not a cross class copy, this likely leads to mixing
427 // of NEON and VFP code and should be therefore penalized.
428 if (ValTy->isVectorTy() &&
429 ValTy->getScalarSizeInBits() <= 32)
430 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
431 }
432
433 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
434 Opcode == Instruction::ExtractElement)) {
435 // We say MVE moves costs at least the MVEVectorCostFactor, even though
436 // they are scalar instructions. This helps prevent mixing scalar and
437 // vector, to prevent vectorising where we end up just scalarising the
438 // result anyway.
439 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index),
440 ST->getMVEVectorCostFactor()) *
441 ValTy->getVectorNumElements() / 2;
442 }
443
444 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
445}
446
447int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
448 const Instruction *I) {
449 int ISD = TLI->InstructionOpcodeToISD(Opcode);
450 // On NEON a vector select gets lowered to vbsl.
451 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
452 // Lowering of some vector selects is currently far from perfect.
453 static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
454 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
455 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
456 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
457 };
458
459 EVT SelCondTy = TLI->getValueType(DL, CondTy);
460 EVT SelValTy = TLI->getValueType(DL, ValTy);
461 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
462 if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
463 SelCondTy.getSimpleVT(),
464 SelValTy.getSimpleVT()))
465 return Entry->Cost;
466 }
467
468 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
469 return LT.first;
470 }
471
472 int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
473 ? ST->getMVEVectorCostFactor()
474 : 1;
475 return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
476}
477
478int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
479 const SCEV *Ptr) {
480 // Address computations in vectorized code with non-consecutive addresses will
481 // likely result in more instructions compared to scalar code where the
482 // computation can more often be merged into the index mode. The resulting
483 // extra micro-ops can significantly decrease throughput.
484 unsigned NumVectorInstToHideOverhead = 10;
485 int MaxMergeDistance = 64;
486
487 if (ST->hasNEON()) {
488 if (Ty->isVectorTy() && SE &&
489 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
490 return NumVectorInstToHideOverhead;
491
492 // In many cases the address computation is not merged into the instruction
493 // addressing mode.
494 return 1;
495 }
496 return BaseT::getAddressComputationCost(Ty, SE, Ptr);
497}
498
499bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
500 if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps())
501 return false;
502
503 if (auto *VecTy = dyn_cast<VectorType>(DataTy)) {
504 // Don't support v2i1 yet.
505 if (VecTy->getNumElements() == 2)
506 return false;
507
508 // We don't support extending fp types.
509 unsigned VecWidth = DataTy->getPrimitiveSizeInBits();
510 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
511 return false;
512 }
513
514 unsigned EltWidth = DataTy->getScalarSizeInBits();
515 return (EltWidth == 32 && (!Alignment || Alignment >= 4)) ||
516 (EltWidth == 16 && (!Alignment || Alignment >= 2)) ||
517 (EltWidth == 8);
518}
519
520bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, MaybeAlign Alignment) {
521 if (!EnableMaskedGatherScatters || !ST->hasMVEIntegerOps())
522 return false;
523
524 // This method is called in 2 places:
525 // - from the vectorizer with a scalar type, in which case we need to get
526 // this as good as we can with the limited info we have (and rely on the cost
527 // model for the rest).
528 // - from the masked intrinsic lowering pass with the actual vector type.
529 // For MVE, we have a custom lowering pass that will already have custom
530 // legalised any gathers that we can to MVE intrinsics, and want to expand all
531 // the rest. The pass runs before the masked intrinsic lowering pass, so if we
532 // are here, we know we want to expand.
533 if (isa<VectorType>(Ty))
534 return false;
535
536 unsigned EltWidth = Ty->getScalarSizeInBits();
537 return ((EltWidth == 32 && (!Alignment || Alignment >= 4)) ||
538 (EltWidth == 16 && (!Alignment || Alignment >= 2)) || EltWidth == 8);
539}
540
541int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
542 const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
543 assert(MI && "MemcpyInst expected")((MI && "MemcpyInst expected") ? static_cast<void>
(0) : __assert_fail ("MI && \"MemcpyInst expected\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 543, __PRETTY_FUNCTION__))
;
544 ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength());
545
546 // To model the cost of a library call, we assume 1 for the call, and
547 // 3 for the argument setup.
548 const unsigned LibCallCost = 4;
549
550 // If 'size' is not a constant, a library call will be generated.
551 if (!C)
552 return LibCallCost;
553
554 const unsigned Size = C->getValue().getZExtValue();
555 const unsigned DstAlign = MI->getDestAlignment();
556 const unsigned SrcAlign = MI->getSourceAlignment();
557 const Function *F = I->getParent()->getParent();
558 const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
559 std::vector<EVT> MemOps;
560
561 // MemOps will be poplulated with a list of data types that needs to be
562 // loaded and stored. That's why we multiply the number of elements by 2 to
563 // get the cost for this memcpy.
564 if (getTLI()->findOptimalMemOpLowering(
565 MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
566 false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
567 MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
568 F->getAttributes()))
569 return MemOps.size() * 2;
570
571 // If we can't find an optimal memop lowering, return the default cost
572 return LibCallCost;
573}
574
575int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
576 Type *SubTp) {
577 if (ST->hasNEON()) {
578 if (Kind == TTI::SK_Broadcast) {
579 static const CostTblEntry NEONDupTbl[] = {
580 // VDUP handles these cases.
581 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
582 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
583 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
584 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
585 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
586 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
587
588 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
589 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
590 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
591 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
592
593 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
594
595 if (const auto *Entry =
596 CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
597 return LT.first * Entry->Cost;
598 }
599 if (Kind == TTI::SK_Reverse) {
600 static const CostTblEntry NEONShuffleTbl[] = {
601 // Reverse shuffle cost one instruction if we are shuffling within a
602 // double word (vrev) or two if we shuffle a quad word (vrev, vext).
603 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
604 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
605 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
606 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
607 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
608 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
609
610 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
611 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
612 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
613 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
614
615 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
616
617 if (const auto *Entry =
618 CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
619 return LT.first * Entry->Cost;
620 }
621 if (Kind == TTI::SK_Select) {
622 static const CostTblEntry NEONSelShuffleTbl[] = {
623 // Select shuffle cost table for ARM. Cost is the number of
624 // instructions
625 // required to create the shuffled vector.
626
627 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
628 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
629 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
630 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
631
632 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
633 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
634 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
635
636 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
637
638 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
639
640 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
641 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
642 ISD::VECTOR_SHUFFLE, LT.second))
643 return LT.first * Entry->Cost;
644 }
645 }
646 if (ST->hasMVEIntegerOps()) {
647 if (Kind == TTI::SK_Broadcast) {
648 static const CostTblEntry MVEDupTbl[] = {
649 // VDUP handles these cases.
650 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
651 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
652 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1},
653 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
654 {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
655
656 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
657
658 if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
659 LT.second))
660 return LT.first * Entry->Cost * ST->getMVEVectorCostFactor();
661 }
662 }
663 int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
664 ? ST->getMVEVectorCostFactor()
665 : 1;
666 return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
667}
668
669int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
670 TTI::OperandValueKind Op1Info,
671 TTI::OperandValueKind Op2Info,
672 TTI::OperandValueProperties Opd1PropInfo,
673 TTI::OperandValueProperties Opd2PropInfo,
674 ArrayRef<const Value *> Args,
675 const Instruction *CxtI) {
676 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
677 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
678
679 if (ST->hasNEON()) {
680 const unsigned FunctionCallDivCost = 20;
681 const unsigned ReciprocalDivCost = 10;
682 static const CostTblEntry CostTbl[] = {
683 // Division.
684 // These costs are somewhat random. Choose a cost of 20 to indicate that
685 // vectorizing devision (added function call) is going to be very expensive.
686 // Double registers types.
687 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
688 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
689 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
690 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
691 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
692 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
693 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
694 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
695 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
696 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
697 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
698 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
699 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
700 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
701 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
702 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
703 // Quad register types.
704 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
705 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
706 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
707 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
708 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
709 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
710 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
711 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
712 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
713 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
714 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
715 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
716 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
717 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
718 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
719 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
720 // Multiplication.
721 };
722
723 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
724 return LT.first * Entry->Cost;
725
726 int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
727 Opd1PropInfo, Opd2PropInfo);
728
729 // This is somewhat of a hack. The problem that we are facing is that SROA
730 // creates a sequence of shift, and, or instructions to construct values.
731 // These sequences are recognized by the ISel and have zero-cost. Not so for
732 // the vectorized code. Because we have support for v2i64 but not i64 those
733 // sequences look particularly beneficial to vectorize.
734 // To work around this we increase the cost of v2i64 operations to make them
735 // seem less beneficial.
736 if (LT.second == MVT::v2i64 &&
737 Op2Info == TargetTransformInfo::OK_UniformConstantValue)
738 Cost += 4;
739
740 return Cost;
741 }
742
743 // If this operation is a shift on arm/thumb2, it might well be folded into
744 // the following instruction, hence having a cost of 0.
745 auto LooksLikeAFreeShift = [&]() {
746 if (ST->isThumb1Only() || Ty->isVectorTy())
747 return false;
748
749 if (!CxtI || !CxtI->hasOneUse() || !CxtI->isShift())
750 return false;
751 if (Op2Info != TargetTransformInfo::OK_UniformConstantValue)
752 return false;
753
754 // Folded into a ADC/ADD/AND/BIC/CMP/EOR/MVN/ORR/ORN/RSB/SBC/SUB
755 switch (cast<Instruction>(CxtI->user_back())->getOpcode()) {
756 case Instruction::Add:
757 case Instruction::Sub:
758 case Instruction::And:
759 case Instruction::Xor:
760 case Instruction::Or:
761 case Instruction::ICmp:
762 return true;
763 default:
764 return false;
765 }
766 };
767 if (LooksLikeAFreeShift())
768 return 0;
769
770 int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
771 ? ST->getMVEVectorCostFactor()
772 : 1;
773
774 // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
775 // without treating floats as more expensive that scalars or increasing the
776 // costs for custom operations. The results is also multiplied by the
777 // MVEVectorCostFactor where appropriate.
778 if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
779 return LT.first * BaseCost;
780
781 // Else this is expand, assume that we need to scalarize this op.
782 if (Ty->isVectorTy()) {
783 unsigned Num = Ty->getVectorNumElements();
784 unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
785 // Return the cost of multiple scalar invocation plus the cost of
786 // inserting and extracting the values.
787 return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost;
788 }
789
790 return BaseCost;
791}
792
793int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
794 MaybeAlign Alignment, unsigned AddressSpace,
795 const Instruction *I) {
796 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
797
798 if (ST->hasNEON() && Src->isVectorTy() &&
799 (Alignment && *Alignment != Align(16)) &&
800 Src->getVectorElementType()->isDoubleTy()) {
801 // Unaligned loads/stores are extremely inefficient.
802 // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
803 return LT.first * 4;
804 }
805 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
806 ? ST->getMVEVectorCostFactor()
807 : 1;
808 return BaseCost * LT.first;
809}
810
811int ARMTTIImpl::getInterleavedMemoryOpCost(
812 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
813 unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
814 bool UseMaskForGaps) {
815 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 815, __PRETTY_FUNCTION__))
;
816 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 816, __PRETTY_FUNCTION__))
;
817
818 // vldN/vstN doesn't support vector types of i64/f64 element.
819 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
820
821 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
822 !UseMaskForCond && !UseMaskForGaps) {
823 unsigned NumElts = VecTy->getVectorNumElements();
824 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
825
826 // vldN/vstN only support legal vector types of size 64 or 128 in bits.
827 // Accesses having vector types that are a multiple of 128 bits can be
828 // matched to more than one vldN/vstN instruction.
829 int BaseCost = ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor() : 1;
830 if (NumElts % Factor == 0 &&
831 TLI->isLegalInterleavedAccessType(Factor, SubVecTy, DL))
832 return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy, DL);
833
834 // Some smaller than legal interleaved patterns are cheap as we can make
835 // use of the vmovn or vrev patterns to interleave a standard load. This is
836 // true for v4i8, v8i8 and v4i16 at least (but not for v4f16 as it is
837 // promoted differently). The cost of 2 here is then a load and vrev or
838 // vmovn.
839 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
840 VecTy->isIntOrIntVectorTy() && DL.getTypeSizeInBits(SubVecTy) <= 64)
841 return 2 * BaseCost;
842 }
843
844 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
845 Alignment, AddressSpace,
846 UseMaskForCond, UseMaskForGaps);
847}
848
849bool ARMTTIImpl::isLoweredToCall(const Function *F) {
850 if (!F->isIntrinsic())
851 BaseT::isLoweredToCall(F);
852
853 // Assume all Arm-specific intrinsics map to an instruction.
854 if (F->getName().startswith("llvm.arm"))
855 return false;
856
857 switch (F->getIntrinsicID()) {
858 default: break;
859 case Intrinsic::powi:
860 case Intrinsic::sin:
861 case Intrinsic::cos:
862 case Intrinsic::pow:
863 case Intrinsic::log:
864 case Intrinsic::log10:
865 case Intrinsic::log2:
866 case Intrinsic::exp:
867 case Intrinsic::exp2:
868 return true;
869 case Intrinsic::sqrt:
870 case Intrinsic::fabs:
871 case Intrinsic::copysign:
872 case Intrinsic::floor:
873 case Intrinsic::ceil:
874 case Intrinsic::trunc:
875 case Intrinsic::rint:
876 case Intrinsic::nearbyint:
877 case Intrinsic::round:
878 case Intrinsic::canonicalize:
879 case Intrinsic::lround:
880 case Intrinsic::llround:
881 case Intrinsic::lrint:
882 case Intrinsic::llrint:
883 if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())
884 return true;
885 if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
886 return true;
887 // Some operations can be handled by vector instructions and assume
888 // unsupported vectors will be expanded into supported scalar ones.
889 // TODO Handle scalar operations properly.
890 return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
891 case Intrinsic::masked_store:
892 case Intrinsic::masked_load:
893 case Intrinsic::masked_gather:
894 case Intrinsic::masked_scatter:
895 return !ST->hasMVEIntegerOps();
896 case Intrinsic::sadd_with_overflow:
897 case Intrinsic::uadd_with_overflow:
898 case Intrinsic::ssub_with_overflow:
899 case Intrinsic::usub_with_overflow:
900 case Intrinsic::sadd_sat:
901 case Intrinsic::uadd_sat:
902 case Intrinsic::ssub_sat:
903 case Intrinsic::usub_sat:
904 return false;
905 }
906
907 return BaseT::isLoweredToCall(F);
908}
909
910bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
911 AssumptionCache &AC,
912 TargetLibraryInfo *LibInfo,
913 HardwareLoopInfo &HWLoopInfo) {
914 // Low-overhead branches are only supported in the 'low-overhead branch'
915 // extension of v8.1-m.
916 if (!ST->hasLOB() || DisableLowOverheadLoops)
917 return false;
918
919 if (!SE.hasLoopInvariantBackedgeTakenCount(L))
920 return false;
921
922 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
923 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
924 return false;
925
926 const SCEV *TripCountSCEV =
927 SE.getAddExpr(BackedgeTakenCount,
928 SE.getOne(BackedgeTakenCount->getType()));
929
930 // We need to store the trip count in LR, a 32-bit register.
931 if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32)
932 return false;
933
934 // Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
935 // point in generating a hardware loop if that's going to happen.
936 auto MaybeCall = [this](Instruction &I) {
937 const ARMTargetLowering *TLI = getTLI();
938 unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
939 EVT VT = TLI->getValueType(DL, I.getType(), true);
940 if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
941 return true;
942
943 // Check if an intrinsic will be lowered to a call and assume that any
944 // other CallInst will generate a bl.
945 if (auto *Call = dyn_cast<CallInst>(&I)) {
946 if (isa<IntrinsicInst>(Call)) {
947 if (const Function *F = Call->getCalledFunction())
948 return isLoweredToCall(F);
949 }
950 return true;
951 }
952
953 // FPv5 provides conversions between integer, double-precision,
954 // single-precision, and half-precision formats.
955 switch (I.getOpcode()) {
956 default:
957 break;
958 case Instruction::FPToSI:
959 case Instruction::FPToUI:
960 case Instruction::SIToFP:
961 case Instruction::UIToFP:
962 case Instruction::FPTrunc:
963 case Instruction::FPExt:
964 return !ST->hasFPARMv8Base();
965 }
966
967 // FIXME: Unfortunately the approach of checking the Operation Action does
968 // not catch all cases of Legalization that use library calls. Our
969 // Legalization step categorizes some transformations into library calls as
970 // Custom, Expand or even Legal when doing type legalization. So for now
971 // we have to special case for instance the SDIV of 64bit integers and the
972 // use of floating point emulation.
973 if (VT.isInteger() && VT.getSizeInBits() >= 64) {
974 switch (ISD) {
975 default:
976 break;
977 case ISD::SDIV:
978 case ISD::UDIV:
979 case ISD::SREM:
980 case ISD::UREM:
981 case ISD::SDIVREM:
982 case ISD::UDIVREM:
983 return true;
984 }
985 }
986
987 // Assume all other non-float operations are supported.
988 if (!VT.isFloatingPoint())
989 return false;
990
991 // We'll need a library call to handle most floats when using soft.
992 if (TLI->useSoftFloat()) {
993 switch (I.getOpcode()) {
994 default:
995 return true;
996 case Instruction::Alloca:
997 case Instruction::Load:
998 case Instruction::Store:
999 case Instruction::Select:
1000 case Instruction::PHI:
1001 return false;
1002 }
1003 }
1004
1005 // We'll need a libcall to perform double precision operations on a single
1006 // precision only FPU.
1007 if (I.getType()->isDoubleTy() && !ST->hasFP64())
1008 return true;
1009
1010 // Likewise for half precision arithmetic.
1011 if (I.getType()->isHalfTy() && !ST->hasFullFP16())
1012 return true;
1013
1014 return false;
1015 };
1016
1017 auto IsHardwareLoopIntrinsic = [](Instruction &I) {
1018 if (auto *Call = dyn_cast<IntrinsicInst>(&I)) {
1019 switch (Call->getIntrinsicID()) {
1020 default:
1021 break;
1022 case Intrinsic::set_loop_iterations:
1023 case Intrinsic::test_set_loop_iterations:
1024 case Intrinsic::loop_decrement:
1025 case Intrinsic::loop_decrement_reg:
1026 return true;
1027 }
1028 }
1029 return false;
1030 };
1031
1032 // Scan the instructions to see if there's any that we know will turn into a
1033 // call or if this loop is already a low-overhead loop.
1034 auto ScanLoop = [&](Loop *L) {
1035 for (auto *BB : L->getBlocks()) {
1036 for (auto &I : *BB) {
1037 if (MaybeCall(I) || IsHardwareLoopIntrinsic(I))
1038 return false;
1039 }
1040 }
1041 return true;
1042 };
1043
1044 // Visit inner loops.
1045 for (auto Inner : *L)
1046 if (!ScanLoop(Inner))
1047 return false;
1048
1049 if (!ScanLoop(L))
1050 return false;
1051
1052 // TODO: Check whether the trip count calculation is expensive. If L is the
1053 // inner loop but we know it has a low trip count, calculating that trip
1054 // count (in the parent loop) may be detrimental.
1055
1056 LLVMContext &C = L->getHeader()->getContext();
1057 HWLoopInfo.CounterInReg = true;
1058 HWLoopInfo.IsNestingLegal = false;
1059 HWLoopInfo.PerformEntryTest = true;
1060 HWLoopInfo.CountType = Type::getInt32Ty(C);
1061 HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
1062 return true;
1063}
1064
1065static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
1066 // We don't allow icmp's, and because we only look at single block loops,
1067 // we simply count the icmps, i.e. there should only be 1 for the backedge.
1068 if (isa<ICmpInst>(&I) && ++ICmpCount > 1)
1069 return false;
1070
1071 if (isa<FCmpInst>(&I))
1072 return false;
1073
1074 // We could allow extending/narrowing FP loads/stores, but codegen is
1075 // too inefficient so reject this for now.
1076 if (isa<FPExtInst>(&I) || isa<FPTruncInst>(&I))
1077 return false;
1078
1079 // Extends have to be extending-loads
1080 if (isa<SExtInst>(&I) || isa<ZExtInst>(&I) )
1081 if (!I.getOperand(0)->hasOneUse() || !isa<LoadInst>(I.getOperand(0)))
1082 return false;
1083
1084 // Truncs have to be narrowing-stores
1085 if (isa<TruncInst>(&I) )
1086 if (!I.hasOneUse() || !isa<StoreInst>(*I.user_begin()))
1087 return false;
1088
1089 return true;
1090}
1091
1092// To set up a tail-predicated loop, we need to know the total number of
1093// elements processed by that loop. Thus, we need to determine the element
1094// size and:
1095// 1) it should be uniform for all operations in the vector loop, so we
1096// e.g. don't want any widening/narrowing operations.
1097// 2) it should be smaller than i64s because we don't have vector operations
1098// that work on i64s.
1099// 3) we don't want elements to be reversed or shuffled, to make sure the
1100// tail-predication masks/predicates the right lanes.
1101//
1102static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1103 const DataLayout &DL,
1104 const LoopAccessInfo *LAI) {
1105 PredicatedScalarEvolution PSE = LAI->getPSE();
1106 int ICmpCount = 0;
1107 int Stride = 0;
1108
1109 LLVM_DEBUG(dbgs() << "tail-predication: checking allowed instructions\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "tail-predication: checking allowed instructions\n"
; } } while (false)
;
1110 SmallVector<Instruction *, 16> LoadStores;
1111 for (BasicBlock *BB : L->blocks()) {
1112 for (Instruction &I : BB->instructionsWithoutDebug()) {
1113 if (isa<PHINode>(&I))
1114 continue;
1115 if (!canTailPredicateInstruction(I, ICmpCount)) {
1116 LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Instruction not allowed: "; I.
dump(); } } while (false)
;
1117 return false;
1118 }
1119
1120 Type *T = I.getType();
1121 if (T->isPointerTy())
1122 T = T->getPointerElementType();
1123
1124 if (T->getScalarSizeInBits() > 32) {
1125 LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Unsupported Type: "; T->dump
(); } } while (false)
;
1126 return false;
1127 }
1128
1129 if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
1130 Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
1131 int64_t NextStride = getPtrStride(PSE, Ptr, L);
1132 // TODO: for now only allow consecutive strides of 1. We could support
1133 // other strides as long as it is uniform, but let's keep it simple for
1134 // now.
1135 if (Stride == 0 && NextStride == 1) {
1136 Stride = NextStride;
1137 continue;
1138 }
1139 if (Stride != NextStride) {
1140 LLVM_DEBUG(dbgs() << "Different strides found, can't "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Different strides found, can't "
"tail-predicate\n."; } } while (false)
1141 "tail-predicate\n.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Different strides found, can't "
"tail-predicate\n."; } } while (false)
;
1142 return false;
1143 }
1144 }
1145 }
1146 }
1147
1148 LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "tail-predication: all instructions allowed!\n"
; } } while (false)
;
1149 return true;
1150}
1151
1152bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
1153 ScalarEvolution &SE,
1154 AssumptionCache &AC,
1155 TargetLibraryInfo *TLI,
1156 DominatorTree *DT,
1157 const LoopAccessInfo *LAI) {
1158 if (DisableTailPredication)
1159 return false;
1160
1161 // Creating a predicated vector loop is the first step for generating a
1162 // tail-predicated hardware loop, for which we need the MVE masked
1163 // load/stores instructions:
1164 if (!ST->hasMVEIntegerOps())
1165 return false;
1166
1167 // For now, restrict this to single block loops.
1168 if (L->getNumBlocks() > 1) {
1169 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: not a single block "
"loop.\n"; } } while (false)
1170 "loop.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: not a single block "
"loop.\n"; } } while (false)
;
1171 return false;
1172 }
1173
1174 assert(L->empty() && "preferPredicateOverEpilogue: inner-loop expected")((L->empty() && "preferPredicateOverEpilogue: inner-loop expected"
) ? static_cast<void> (0) : __assert_fail ("L->empty() && \"preferPredicateOverEpilogue: inner-loop expected\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 1174, __PRETTY_FUNCTION__))
;
1175
1176 HardwareLoopInfo HWLoopInfo(L);
1177 if (!HWLoopInfo.canAnalyze(*LI)) {
1178 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"analyzable.\n"; } } while (false)
1179 "analyzable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"analyzable.\n"; } } while (false)
;
1180 return false;
1181 }
1182
1183 // This checks if we have the low-overhead branch architecture
1184 // extension, and if we will create a hardware-loop:
1185 if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
1186 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"profitable.\n"; } } while (false)
1187 "profitable.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"profitable.\n"; } } while (false)
;
1188 return false;
1189 }
1190
1191 if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) {
1192 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"a candidate.\n"; } } while (false)
1193 "a candidate.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"a candidate.\n"; } } while (false)
;
1194 return false;
1195 }
1196
1197 return canTailPredicateLoop(L, LI, SE, DL, LAI);
1198}
1199
1200
1201void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1202 TTI::UnrollingPreferences &UP) {
1203 // Only currently enable these preferences for M-Class cores.
1204 if (!ST->isMClass())
1205 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
1206
1207 // Disable loop unrolling for Oz and Os.
1208 UP.OptSizeThreshold = 0;
1209 UP.PartialOptSizeThreshold = 0;
1210 if (L->getHeader()->getParent()->hasOptSize())
1211 return;
1212
1213 // Only enable on Thumb-2 targets.
1214 if (!ST->isThumb2())
1215 return;
1216
1217 SmallVector<BasicBlock*, 4> ExitingBlocks;
1218 L->getExitingBlocks(ExitingBlocks);
1219 LLVM_DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
1220 << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
1221 << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
;
1222
1223 // Only allow another exit other than the latch. This acts as an early exit
1224 // as it mirrors the profitability calculation of the runtime unroller.
1225 if (ExitingBlocks.size() > 2)
1226 return;
1227
1228 // Limit the CFG of the loop body for targets with a branch predictor.
1229 // Allowing 4 blocks permits if-then-else diamonds in the body.
1230 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
1231 return;
1232
1233 // Scan the loop: don't unroll loops with calls as this could prevent
1234 // inlining.
1235 unsigned Cost = 0;
1236 for (auto *BB : L->getBlocks()) {
1237 for (auto &I : *BB) {
1238 // Don't unroll vectorised loop. MVE does not benefit from it as much as
1239 // scalar code.
1240 if (I.getType()->isVectorTy())
1241 return;
1242
1243 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1244 ImmutableCallSite CS(&I);
1245 if (const Function *F = CS.getCalledFunction()) {
1246 if (!isLoweredToCall(F))
1247 continue;
1248 }
1249 return;
1250 }
1251
1252 SmallVector<const Value*, 4> Operands(I.value_op_begin(),
1253 I.value_op_end());
1254 Cost += getUserCost(&I, Operands);
1255 }
1256 }
1257
1258 LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Cost of loop: " << Cost <<
"\n"; } } while (false)
;
1259
1260 UP.Partial = true;
1261 UP.Runtime = true;
1262 UP.UpperBound = true;
1263 UP.UnrollRemainder = true;
1264 UP.DefaultUnrollRuntimeCount = 4;
1265 UP.UnrollAndJam = true;
1266 UP.UnrollAndJamInnerLoopThreshold = 60;
1267
1268 // Force unrolling small loops can be very useful because of the branch
1269 // taken cost of the backedge.
1270 if (Cost < 12)
1271 UP.Force = true;
1272}
1273
1274bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
1275 TTI::ReductionFlags Flags) const {
1276 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 1276, __PRETTY_FUNCTION__))
;
1277 unsigned ScalarBits = Ty->getScalarSizeInBits();
1278 if (!ST->hasMVEIntegerOps())
1279 return false;
1280
1281 switch (Opcode) {
1282 case Instruction::FAdd:
1283 case Instruction::FMul:
1284 case Instruction::And:
1285 case Instruction::Or:
1286 case Instruction::Xor:
1287 case Instruction::Mul:
1288 case Instruction::FCmp:
1289 return false;
1290 case Instruction::ICmp:
1291 case Instruction::Add:
1292 return ScalarBits < 64 && ScalarBits * Ty->getVectorNumElements() == 128;
1293 default:
1294 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 1294)
;
1295 }
1296 return false;
1297}

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

1//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM addressing mode implementation stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
14#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
15
16#include "llvm/ADT/APFloat.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/bit.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22
23namespace llvm {
24
25/// ARM_AM - ARM Addressing Mode Stuff
26namespace ARM_AM {
27 enum ShiftOpc {
28 no_shift = 0,
29 asr,
30 lsl,
31 lsr,
32 ror,
33 rrx,
34 uxtw
35 };
36
37 enum AddrOpc {
38 sub = 0,
39 add
40 };
41
42 inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
43
44 inline const char *getShiftOpcStr(ShiftOpc Op) {
45 switch (Op) {
46 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 46)
;
47 case ARM_AM::asr: return "asr";
48 case ARM_AM::lsl: return "lsl";
49 case ARM_AM::lsr: return "lsr";
50 case ARM_AM::ror: return "ror";
51 case ARM_AM::rrx: return "rrx";
52 case ARM_AM::uxtw: return "uxtw";
53 }
54 }
55
56 inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
57 switch (Op) {
58 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 58)
;
59 case ARM_AM::asr: return 2;
60 case ARM_AM::lsl: return 0;
61 case ARM_AM::lsr: return 1;
62 case ARM_AM::ror: return 3;
63 }
64 }
65
66 enum AMSubMode {
67 bad_am_submode = 0,
68 ia,
69 ib,
70 da,
71 db
72 };
73
74 inline const char *getAMSubModeStr(AMSubMode Mode) {
75 switch (Mode) {
76 default: llvm_unreachable("Unknown addressing sub-mode!")::llvm::llvm_unreachable_internal("Unknown addressing sub-mode!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 76)
;
77 case ARM_AM::ia: return "ia";
78 case ARM_AM::ib: return "ib";
79 case ARM_AM::da: return "da";
80 case ARM_AM::db: return "db";
81 }
82 }
83
84 /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
85 ///
86 inline unsigned rotr32(unsigned Val, unsigned Amt) {
87 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 87, __PRETTY_FUNCTION__))
;
88 return (Val >> Amt) | (Val << ((32-Amt)&31));
89 }
90
91 /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
92 ///
93 inline unsigned rotl32(unsigned Val, unsigned Amt) {
94 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 94, __PRETTY_FUNCTION__))
;
95 return (Val << Amt) | (Val >> ((32-Amt)&31));
96 }
97
98 //===--------------------------------------------------------------------===//
99 // Addressing Mode #1: shift_operand with registers
100 //===--------------------------------------------------------------------===//
101 //
102 // This 'addressing mode' is used for arithmetic instructions. It can
103 // represent things like:
104 // reg
105 // reg [asr|lsl|lsr|ror|rrx] reg
106 // reg [asr|lsl|lsr|ror|rrx] imm
107 //
108 // This is stored three operands [rega, regb, opc]. The first is the base
109 // reg, the second is the shift amount (or reg0 if not present or imm). The
110 // third operand encodes the shift opcode and the imm if a reg isn't present.
111 //
112 inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
113 return ShOp | (Imm << 3);
114 }
115 inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; }
116 inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); }
117
118 /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
119 /// the 8-bit imm value.
120 inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; }
121 /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
122 /// the rotate amount.
123 inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; }
124
125 /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
126 /// computing the rotate amount to use. If this immediate value cannot be
127 /// handled with a single shifter-op, determine a good rotate amount that will
128 /// take a maximal chunk of bits out of the immediate.
129 inline unsigned getSOImmValRotate(unsigned Imm) {
130 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
131 // of zero.
132 if ((Imm & ~255U) == 0) return 0;
133
134 // Use CTZ to compute the rotate amount.
135 unsigned TZ = countTrailingZeros(Imm);
136
137 // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
138 // not 9.
139 unsigned RotAmt = TZ & ~1;
140
141 // If we can handle this spread, return it.
142 if ((rotr32(Imm, RotAmt) & ~255U) == 0)
143 return (32-RotAmt)&31; // HW rotates right, not left.
144
145 // For values like 0xF000000F, we should ignore the low 6 bits, then
146 // retry the hunt.
147 if (Imm & 63U) {
148 unsigned TZ2 = countTrailingZeros(Imm & ~63U);
149 unsigned RotAmt2 = TZ2 & ~1;
150 if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
151 return (32-RotAmt2)&31; // HW rotates right, not left.
152 }
153
154 // Otherwise, we have no way to cover this span of bits with a single
155 // shifter_op immediate. Return a chunk of bits that will be useful to
156 // handle.
157 return (32-RotAmt)&31; // HW rotates right, not left.
158 }
159
160 /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
161 /// into an shifter_operand immediate operand, return the 12-bit encoding for
162 /// it. If not, return -1.
163 inline int getSOImmVal(unsigned Arg) {
164 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
165 // of zero.
166 if ((Arg & ~255U) == 0) return Arg;
167
168 unsigned RotAmt = getSOImmValRotate(Arg);
169
170 // If this cannot be handled with a single shifter_op, bail out.
171 if (rotr32(~255U, RotAmt) & Arg)
172 return -1;
173
174 // Encode this correctly.
175 return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
176 }
177
178 /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
179 /// or'ing together two SOImmVal's.
180 inline bool isSOImmTwoPartVal(unsigned V) {
181 // If this can be handled with a single shifter_op, bail out.
182 V = rotr32(~255U, getSOImmValRotate(V)) & V;
183 if (V == 0)
184 return false;
185
186 // If this can be handled with two shifter_op's, accept.
187 V = rotr32(~255U, getSOImmValRotate(V)) & V;
188 return V == 0;
189 }
190
191 /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
192 /// return the first chunk of it.
193 inline unsigned getSOImmTwoPartFirst(unsigned V) {
194 return rotr32(255U, getSOImmValRotate(V)) & V;
195 }
196
197 /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
198 /// return the second chunk of it.
199 inline unsigned getSOImmTwoPartSecond(unsigned V) {
200 // Mask out the first hunk.
201 V = rotr32(~255U, getSOImmValRotate(V)) & V;
202
203 // Take what's left.
204 assert(V == (rotr32(255U, getSOImmValRotate(V)) & V))((V == (rotr32(255U, getSOImmValRotate(V)) & V)) ? static_cast
<void> (0) : __assert_fail ("V == (rotr32(255U, getSOImmValRotate(V)) & V)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 204, __PRETTY_FUNCTION__))
;
205 return V;
206 }
207
208 /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
209 /// by a left shift. Returns the shift amount to use.
210 inline unsigned getThumbImmValShift(unsigned Imm) {
211 // 8-bit (or less) immediates are trivially immediate operand with a shift
212 // of zero.
213 if ((Imm & ~255U) == 0) return 0;
20
Assuming the condition is false
21
Taking false branch
214
215 // Use CTZ to compute the shift amount.
216 return countTrailingZeros(Imm);
22
Calling 'countTrailingZeros<unsigned int>'
29
Returning from 'countTrailingZeros<unsigned int>'
30
Returning the value 32
217 }
218
219 /// isThumbImmShiftedVal - Return true if the specified value can be obtained
220 /// by left shifting a 8-bit immediate.
221 inline bool isThumbImmShiftedVal(unsigned V) {
222 // If this can be handled with
223 V = (~255U << getThumbImmValShift(V)) & V;
19
Calling 'getThumbImmValShift'
31
Returning from 'getThumbImmValShift'
32
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
224 return V == 0;
225 }
226
227 /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
228 /// by a left shift. Returns the shift amount to use.
229 inline unsigned getThumbImm16ValShift(unsigned Imm) {
230 // 16-bit (or less) immediates are trivially immediate operand with a shift
231 // of zero.
232 if ((Imm & ~65535U) == 0) return 0;
233
234 // Use CTZ to compute the shift amount.
235 return countTrailingZeros(Imm);
236 }
237
238 /// isThumbImm16ShiftedVal - Return true if the specified value can be
239 /// obtained by left shifting a 16-bit immediate.
240 inline bool isThumbImm16ShiftedVal(unsigned V) {
241 // If this can be handled with
242 V = (~65535U << getThumbImm16ValShift(V)) & V;
243 return V == 0;
244 }
245
246 /// getThumbImmNonShiftedVal - If V is a value that satisfies
247 /// isThumbImmShiftedVal, return the non-shiftd value.
248 inline unsigned getThumbImmNonShiftedVal(unsigned V) {
249 return V >> getThumbImmValShift(V);
250 }
251
252
253 /// getT2SOImmValSplat - Return the 12-bit encoded representation
254 /// if the specified value can be obtained by splatting the low 8 bits
255 /// into every other byte or every byte of a 32-bit value. i.e.,
256 /// 00000000 00000000 00000000 abcdefgh control = 0
257 /// 00000000 abcdefgh 00000000 abcdefgh control = 1
258 /// abcdefgh 00000000 abcdefgh 00000000 control = 2
259 /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
260 /// Return -1 if none of the above apply.
261 /// See ARM Reference Manual A6.3.2.
262 inline int getT2SOImmValSplatVal(unsigned V) {
263 unsigned u, Vs, Imm;
264 // control = 0
265 if ((V & 0xffffff00) == 0)
266 return V;
267
268 // If the value is zeroes in the first byte, just shift those off
269 Vs = ((V & 0xff) == 0) ? V >> 8 : V;
270 // Any passing value only has 8 bits of payload, splatted across the word
271 Imm = Vs & 0xff;
272 // Likewise, any passing values have the payload splatted into the 3rd byte
273 u = Imm | (Imm << 16);
274
275 // control = 1 or 2
276 if (Vs == u)
277 return (((Vs == V) ? 1 : 2) << 8) | Imm;
278
279 // control = 3
280 if (Vs == (u | (u << 8)))
281 return (3 << 8) | Imm;
282
283 return -1;
284 }
285
286 /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
287 /// specified value is a rotated 8-bit value. Return -1 if no rotation
288 /// encoding is possible.
289 /// See ARM Reference Manual A6.3.2.
290 inline int getT2SOImmValRotateVal(unsigned V) {
291 unsigned RotAmt = countLeadingZeros(V);
292 if (RotAmt >= 24)
293 return -1;
294
295 // If 'Arg' can be handled with a single shifter_op return the value.
296 if ((rotr32(0xff000000U, RotAmt) & V) == V)
297 return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
298
299 return -1;
300 }
301
302 /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
303 /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
304 /// encoding for it. If not, return -1.
305 /// See ARM Reference Manual A6.3.2.
306 inline int getT2SOImmVal(unsigned Arg) {
307 // If 'Arg' is an 8-bit splat, then get the encoded value.
308 int Splat = getT2SOImmValSplatVal(Arg);
309 if (Splat != -1)
310 return Splat;
311
312 // If 'Arg' can be handled with a single shifter_op return the value.
313 int Rot = getT2SOImmValRotateVal(Arg);
314 if (Rot != -1)
315 return Rot;
316
317 return -1;
318 }
319
320 inline unsigned getT2SOImmValRotate(unsigned V) {
321 if ((V & ~255U) == 0) return 0;
322 // Use CTZ to compute the rotate amount.
323 unsigned RotAmt = countTrailingZeros(V);
324 return (32 - RotAmt) & 31;
325 }
326
327 inline bool isT2SOImmTwoPartVal(unsigned Imm) {
328 unsigned V = Imm;
329 // Passing values can be any combination of splat values and shifter
330 // values. If this can be handled with a single shifter or splat, bail
331 // out. Those should be handled directly, not with a two-part val.
332 if (getT2SOImmValSplatVal(V) != -1)
333 return false;
334 V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
335 if (V == 0)
336 return false;
337
338 // If this can be handled as an immediate, accept.
339 if (getT2SOImmVal(V) != -1) return true;
340
341 // Likewise, try masking out a splat value first.
342 V = Imm;
343 if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
344 V &= ~0xff00ff00U;
345 else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
346 V &= ~0x00ff00ffU;
347 // If what's left can be handled as an immediate, accept.
348 if (getT2SOImmVal(V) != -1) return true;
349
350 // Otherwise, do not accept.
351 return false;
352 }
353
354 inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
355 assert (isT2SOImmTwoPartVal(Imm) &&((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 356, __PRETTY_FUNCTION__))
356 "Immedate cannot be encoded as two part immediate!")((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 356, __PRETTY_FUNCTION__))
;
357 // Try a shifter operand as one part
358 unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
359 // If the rest is encodable as an immediate, then return it.
360 if (getT2SOImmVal(V) != -1) return V;
361
362 // Try masking out a splat value first.
363 if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
364 return Imm & 0xff00ff00U;
365
366 // The other splat is all that's left as an option.
367 assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1)((getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1) ? static_cast
<void> (0) : __assert_fail ("getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 367, __PRETTY_FUNCTION__))
;
368 return Imm & 0x00ff00ffU;
369 }
370
371 inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
372 // Mask out the first hunk
373 Imm ^= getT2SOImmTwoPartFirst(Imm);
374 // Return what's left
375 assert (getT2SOImmVal(Imm) != -1 &&((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 376, __PRETTY_FUNCTION__))
376 "Unable to encode second part of T2 two part SO immediate")((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 376, __PRETTY_FUNCTION__))
;
377 return Imm;
378 }
379
380
381 //===--------------------------------------------------------------------===//
382 // Addressing Mode #2
383 //===--------------------------------------------------------------------===//
384 //
385 // This is used for most simple load/store instructions.
386 //
387 // addrmode2 := reg +/- reg shop imm
388 // addrmode2 := reg +/- imm12
389 //
390 // The first operand is always a Reg. The second operand is a reg if in
391 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
392 // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
393 // fourth operand 16-17 encodes the index mode.
394 //
395 // If this addressing mode is a frame index (before prolog/epilog insertion
396 // and code rewriting), this operand will have the form: FI#, reg0, <offs>
397 // with no shift amount for the frame offset.
398 //
399 inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
400 unsigned IdxMode = 0) {
401 assert(Imm12 < (1 << 12) && "Imm too large!")((Imm12 < (1 << 12) && "Imm too large!") ? static_cast
<void> (0) : __assert_fail ("Imm12 < (1 << 12) && \"Imm too large!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 401, __PRETTY_FUNCTION__))
;
402 bool isSub = Opc == sub;
403 return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
404 }
405 inline unsigned getAM2Offset(unsigned AM2Opc) {
406 return AM2Opc & ((1 << 12)-1);
407 }
408 inline AddrOpc getAM2Op(unsigned AM2Opc) {
409 return ((AM2Opc >> 12) & 1) ? sub : add;
410 }
411 inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
412 return (ShiftOpc)((AM2Opc >> 13) & 7);
413 }
414 inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); }
415
416 //===--------------------------------------------------------------------===//
417 // Addressing Mode #3
418 //===--------------------------------------------------------------------===//
419 //
420 // This is used for sign-extending loads, and load/store-pair instructions.
421 //
422 // addrmode3 := reg +/- reg
423 // addrmode3 := reg +/- imm8
424 //
425 // The first operand is always a Reg. The second operand is a reg if in
426 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
427 // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
428 // index mode.
429
430 /// getAM3Opc - This function encodes the addrmode3 opc field.
431 inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
432 unsigned IdxMode = 0) {
433 bool isSub = Opc == sub;
434 return ((int)isSub << 8) | Offset | (IdxMode << 9);
435 }
436 inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; }
437 inline AddrOpc getAM3Op(unsigned AM3Opc) {
438 return ((AM3Opc >> 8) & 1) ? sub : add;
439 }
440 inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); }
441
442 //===--------------------------------------------------------------------===//
443 // Addressing Mode #4
444 //===--------------------------------------------------------------------===//
445 //
446 // This is used for load / store multiple instructions.
447 //
448 // addrmode4 := reg, <mode>
449 //
450 // The four modes are:
451 // IA - Increment after
452 // IB - Increment before
453 // DA - Decrement after
454 // DB - Decrement before
455 // For VFP instructions, only the IA and DB modes are valid.
456
457 inline AMSubMode getAM4SubMode(unsigned Mode) {
458 return (AMSubMode)(Mode & 0x7);
459 }
460
461 inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; }
462
463 //===--------------------------------------------------------------------===//
464 // Addressing Mode #5
465 //===--------------------------------------------------------------------===//
466 //
467 // This is used for coprocessor instructions, such as FP load/stores.
468 //
469 // addrmode5 := reg +/- imm8*4
470 //
471 // The first operand is always a Reg. The second operand encodes the
472 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
473
474 /// getAM5Opc - This function encodes the addrmode5 opc field.
475 inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
476 bool isSub = Opc == sub;
477 return ((int)isSub << 8) | Offset;
478 }
479 inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; }
480 inline AddrOpc getAM5Op(unsigned AM5Opc) {
481 return ((AM5Opc >> 8) & 1) ? sub : add;
482 }
483
484 //===--------------------------------------------------------------------===//
485 // Addressing Mode #5 FP16
486 //===--------------------------------------------------------------------===//
487 //
488 // This is used for coprocessor instructions, such as 16-bit FP load/stores.
489 //
490 // addrmode5fp16 := reg +/- imm8*2
491 //
492 // The first operand is always a Reg. The second operand encodes the
493 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
494
495 /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
496 inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
497 bool isSub = Opc == sub;
498 return ((int)isSub << 8) | Offset;
499 }
500 inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
501 return AM5Opc & 0xFF;
502 }
503 inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
504 return ((AM5Opc >> 8) & 1) ? sub : add;
505 }
506
507 //===--------------------------------------------------------------------===//
508 // Addressing Mode #6
509 //===--------------------------------------------------------------------===//
510 //
511 // This is used for NEON load / store instructions.
512 //
513 // addrmode6 := reg with optional alignment
514 //
515 // This is stored in two operands [regaddr, align]. The first is the
516 // address register. The second operand is the value of the alignment
517 // specifier in bytes or zero if no explicit alignment.
518 // Valid alignments depend on the specific instruction.
519
520 //===--------------------------------------------------------------------===//
521 // NEON/MVE Modified Immediates
522 //===--------------------------------------------------------------------===//
523 //
524 // Several NEON and MVE instructions (e.g., VMOV) take a "modified immediate"
525 // vector operand, where a small immediate encoded in the instruction
526 // specifies a full NEON vector value. These modified immediates are
527 // represented here as encoded integers. The low 8 bits hold the immediate
528 // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
529 // the "Cmode" field of the instruction. The interfaces below treat the
530 // Op and Cmode values as a single 5-bit value.
531
532 inline unsigned createVMOVModImm(unsigned OpCmode, unsigned Val) {
533 return (OpCmode << 8) | Val;
534 }
535 inline unsigned getVMOVModImmOpCmode(unsigned ModImm) {
536 return (ModImm >> 8) & 0x1f;
537 }
538 inline unsigned getVMOVModImmVal(unsigned ModImm) { return ModImm & 0xff; }
539
540 /// decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the
541 /// element value and the element size in bits. (If the element size is
542 /// smaller than the vector, it is splatted into all the elements.)
543 inline uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits) {
544 unsigned OpCmode = getVMOVModImmOpCmode(ModImm);
545 unsigned Imm8 = getVMOVModImmVal(ModImm);
546 uint64_t Val = 0;
547
548 if (OpCmode == 0xe) {
549 // 8-bit vector elements
550 Val = Imm8;
551 EltBits = 8;
552 } else if ((OpCmode & 0xc) == 0x8) {
553 // 16-bit vector elements
554 unsigned ByteNum = (OpCmode & 0x6) >> 1;
555 Val = Imm8 << (8 * ByteNum);
556 EltBits = 16;
557 } else if ((OpCmode & 0x8) == 0) {
558 // 32-bit vector elements, zero with one byte set
559 unsigned ByteNum = (OpCmode & 0x6) >> 1;
560 Val = Imm8 << (8 * ByteNum);
561 EltBits = 32;
562 } else if ((OpCmode & 0xe) == 0xc) {
563 // 32-bit vector elements, one byte with low bits set
564 unsigned ByteNum = 1 + (OpCmode & 0x1);
565 Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
566 EltBits = 32;
567 } else if (OpCmode == 0x1e) {
568 // 64-bit vector elements
569 for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
570 if ((ModImm >> ByteNum) & 1)
571 Val |= (uint64_t)0xff << (8 * ByteNum);
572 }
573 EltBits = 64;
574 } else {
575 llvm_unreachable("Unsupported VMOV immediate")::llvm::llvm_unreachable_internal("Unsupported VMOV immediate"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 575)
;
576 }
577 return Val;
578 }
579
580 // Generic validation for single-byte immediate (0X00, 00X0, etc).
581 inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
582 assert(Size >= 1 && Size <= 4 && "Invalid size")((Size >= 1 && Size <= 4 && "Invalid size"
) ? static_cast<void> (0) : __assert_fail ("Size >= 1 && Size <= 4 && \"Invalid size\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 582, __PRETTY_FUNCTION__))
;
583 unsigned count = 0;
584 for (unsigned i = 0; i < Size; ++i) {
585 if (Value & 0xff) count++;
586 Value >>= 8;
587 }
588 return count == 1;
589 }
590
591 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
592 inline bool isNEONi16splat(unsigned Value) {
593 if (Value > 0xffff)
594 return false;
595 // i16 value with set bits only in one byte X0 or 0X.
596 return Value == 0 || isNEONBytesplat(Value, 2);
597 }
598
599 // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
600 inline unsigned encodeNEONi16splat(unsigned Value) {
601 assert(isNEONi16splat(Value) && "Invalid NEON splat value")((isNEONi16splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi16splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 601, __PRETTY_FUNCTION__))
;
602 if (Value >= 0x100)
603 Value = (Value >> 8) | 0xa00;
604 else
605 Value |= 0x800;
606 return Value;
607 }
608
609 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
610 inline bool isNEONi32splat(unsigned Value) {
611 // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
612 return Value == 0 || isNEONBytesplat(Value, 4);
613 }
614
615 /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
616 inline unsigned encodeNEONi32splat(unsigned Value) {
617 assert(isNEONi32splat(Value) && "Invalid NEON splat value")((isNEONi32splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi32splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 617, __PRETTY_FUNCTION__))
;
618 if (Value >= 0x100 && Value <= 0xff00)
619 Value = (Value >> 8) | 0x200;
620 else if (Value > 0xffff && Value <= 0xff0000)
621 Value = (Value >> 16) | 0x400;
622 else if (Value > 0xffffff)
623 Value = (Value >> 24) | 0x600;
624 return Value;
625 }
626
627 //===--------------------------------------------------------------------===//
628 // Floating-point Immediates
629 //
630 inline float getFPImmFloat(unsigned Imm) {
631 // We expect an 8-bit binary encoding of a floating-point number here.
632
633 uint8_t Sign = (Imm >> 7) & 0x1;
634 uint8_t Exp = (Imm >> 4) & 0x7;
635 uint8_t Mantissa = Imm & 0xf;
636
637 // 8-bit FP IEEE Float Encoding
638 // abcd efgh aBbbbbbc defgh000 00000000 00000000
639 //
640 // where B = NOT(b);
641 uint32_t I = 0;
642 I |= Sign << 31;
643 I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
644 I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
645 I |= (Exp & 0x3) << 23;
646 I |= Mantissa << 19;
647 return bit_cast<float>(I);
648 }
649
650 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
651 /// floating-point value. If the value cannot be represented as an 8-bit
652 /// floating-point value, then return -1.
653 inline int getFP16Imm(const APInt &Imm) {
654 uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
655 int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
656 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
657
658 // We can handle 4 bits of mantissa.
659 // mantissa = (16+UInt(e:f:g:h))/16.
660 if (Mantissa & 0x3f)
661 return -1;
662 Mantissa >>= 6;
663
664 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
665 if (Exp < -3 || Exp > 4)
666 return -1;
667 Exp = ((Exp+3) & 0x7) ^ 4;
668
669 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
670 }
671
672 inline int getFP16Imm(const APFloat &FPImm) {
673 return getFP16Imm(FPImm.bitcastToAPInt());
674 }
675
676 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
677 /// floating-point value. If the value cannot be represented as an 8-bit
678 /// floating-point value, then return -1.
679 inline int getFP32Imm(const APInt &Imm) {
680 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
681 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
682 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
683
684 // We can handle 4 bits of mantissa.
685 // mantissa = (16+UInt(e:f:g:h))/16.
686 if (Mantissa & 0x7ffff)
687 return -1;
688 Mantissa >>= 19;
689 if ((Mantissa & 0xf) != Mantissa)
690 return -1;
691
692 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
693 if (Exp < -3 || Exp > 4)
694 return -1;
695 Exp = ((Exp+3) & 0x7) ^ 4;
696
697 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
698 }
699
700 inline int getFP32Imm(const APFloat &FPImm) {
701 return getFP32Imm(FPImm.bitcastToAPInt());
702 }
703
704 /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
705 /// floating-point value. If the value cannot be represented as an 8-bit
706 /// floating-point value, then return -1.
707 inline int getFP64Imm(const APInt &Imm) {
708 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
709 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
710 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
711
712 // We can handle 4 bits of mantissa.
713 // mantissa = (16+UInt(e:f:g:h))/16.
714 if (Mantissa & 0xffffffffffffULL)
715 return -1;
716 Mantissa >>= 48;
717 if ((Mantissa & 0xf) != Mantissa)
718 return -1;
719
720 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
721 if (Exp < -3 || Exp > 4)
722 return -1;
723 Exp = ((Exp+3) & 0x7) ^ 4;
724
725 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
726 }
727
728 inline int getFP64Imm(const APFloat &FPImm) {
729 return getFP64Imm(FPImm.bitcastToAPInt());
730 }
731
732} // end namespace ARM_AM
733} // end namespace llvm
734
735#endif
736

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/SwapByteOrder.h"
18#include <algorithm>
19#include <cassert>
20#include <climits>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB
23.1
'ZB' is not equal to ZB_Undefined
23.1
'ZB' is not equal to ZB_Undefined
23.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val == 0)
24
Assuming 'Val' is equal to 0
25
Taking true branch
117 return 32;
26
Returning the value 32
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB != ZB_Undefined && Val == 0)
133 return 64;
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
23
Calling 'TrailingZerosCounter::count'
27
Returning from 'TrailingZerosCounter::count'
28
Returning the value 32
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 251, __PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315// NOTE: The following support functions use the _32/_64 extensions instead of
316// type overloading so that signed and unsigned integers can be used without
317// ambiguity.
318
319/// Return the high 32 bits of a 64 bit value.
320constexpr inline uint32_t Hi_32(uint64_t Value) {
321 return static_cast<uint32_t>(Value >> 32);
322}
323
324/// Return the low 32 bits of a 64 bit value.
325constexpr inline uint32_t Lo_32(uint64_t Value) {
326 return static_cast<uint32_t>(Value);
327}
328
329/// Make a 64-bit integer from a high / low pair of 32-bit integers.
330constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
331 return ((uint64_t)High << 32) | (uint64_t)Low;
332}
333
334/// Checks if an integer fits into the given bit width.
335template <unsigned N> constexpr inline bool isInt(int64_t x) {
336 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
337}
338// Template specializations to get better code for common cases.
339template <> constexpr inline bool isInt<8>(int64_t x) {
340 return static_cast<int8_t>(x) == x;
341}
342template <> constexpr inline bool isInt<16>(int64_t x) {
343 return static_cast<int16_t>(x) == x;
344}
345template <> constexpr inline bool isInt<32>(int64_t x) {
346 return static_cast<int32_t>(x) == x;
347}
348
349/// Checks if a signed integer is an N bit number shifted left by S.
350template <unsigned N, unsigned S>
351constexpr inline bool isShiftedInt(int64_t x) {
352 static_assert(
353 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
354 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
355 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
356}
357
358/// Checks if an unsigned integer fits into the given bit width.
359///
360/// This is written as two functions rather than as simply
361///
362/// return N >= 64 || X < (UINT64_C(1) << N);
363///
364/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
365/// left too many places.
366template <unsigned N>
367constexpr inline typename std::enable_if<(N < 64), bool>::type
368isUInt(uint64_t X) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 return X < (UINT64_C(1)1UL << (N));
371}
372template <unsigned N>
373constexpr inline typename std::enable_if<N >= 64, bool>::type
374isUInt(uint64_t X) {
375 return true;
376}
377
378// Template specializations to get better code for common cases.
379template <> constexpr inline bool isUInt<8>(uint64_t x) {
380 return static_cast<uint8_t>(x) == x;
381}
382template <> constexpr inline bool isUInt<16>(uint64_t x) {
383 return static_cast<uint16_t>(x) == x;
384}
385template <> constexpr inline bool isUInt<32>(uint64_t x) {
386 return static_cast<uint32_t>(x) == x;
387}
388
389/// Checks if a unsigned integer is an N bit number shifted left by S.
390template <unsigned N, unsigned S>
391constexpr inline bool isShiftedUInt(uint64_t x) {
392 static_assert(
393 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
394 static_assert(N + S <= 64,
395 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
396 // Per the two static_asserts above, S must be strictly less than 64. So
397 // 1 << S is not undefined behavior.
398 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
399}
400
401/// Gets the maximum value for a N-bit unsigned integer.
402inline uint64_t maxUIntN(uint64_t N) {
403 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 403, __PRETTY_FUNCTION__))
;
404
405 // uint64_t(1) << 64 is undefined behavior, so we can't do
406 // (uint64_t(1) << N) - 1
407 // without checking first that N != 64. But this works and doesn't have a
408 // branch.
409 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
410}
411
412/// Gets the minimum value for a N-bit signed integer.
413inline int64_t minIntN(int64_t N) {
414 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 414, __PRETTY_FUNCTION__))
;
415
416 return -(UINT64_C(1)1UL<<(N-1));
417}
418
419/// Gets the maximum value for a N-bit signed integer.
420inline int64_t maxIntN(int64_t N) {
421 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 421, __PRETTY_FUNCTION__))
;
422
423 // This relies on two's complement wraparound when N == 64, so we convert to
424 // int64_t only at the very end to avoid UB.
425 return (UINT64_C(1)1UL << (N - 1)) - 1;
426}
427
428/// Checks if an unsigned integer fits into the given (dynamic) bit width.
429inline bool isUIntN(unsigned N, uint64_t x) {
430 return N >= 64 || x <= maxUIntN(N);
431}
432
433/// Checks if an signed integer fits into the given (dynamic) bit width.
434inline bool isIntN(unsigned N, int64_t x) {
435 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
436}
437
438/// Return true if the argument is a non-empty sequence of ones starting at the
439/// least significant bit with the remainder zero (32 bit version).
440/// Ex. isMask_32(0x0000FFFFU) == true.
441constexpr inline bool isMask_32(uint32_t Value) {
442 return Value && ((Value + 1) & Value) == 0;
443}
444
445/// Return true if the argument is a non-empty sequence of ones starting at the
446/// least significant bit with the remainder zero (64 bit version).
447constexpr inline bool isMask_64(uint64_t Value) {
448 return Value && ((Value + 1) & Value) == 0;
449}
450
451/// Return true if the argument contains a non-empty sequence of ones with the
452/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
453constexpr inline bool isShiftedMask_32(uint32_t Value) {
454 return Value && isMask_32((Value - 1) | Value);
455}
456
457/// Return true if the argument contains a non-empty sequence of ones with the
458/// remainder zero (64 bit version.)
459constexpr inline bool isShiftedMask_64(uint64_t Value) {
460 return Value && isMask_64((Value - 1) | Value);
461}
462
463/// Return true if the argument is a power of two > 0.
464/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
465constexpr inline bool isPowerOf2_32(uint32_t Value) {
466 return Value && !(Value & (Value - 1));
467}
468
469/// Return true if the argument is a power of two > 0 (64 bit edition.)
470constexpr inline bool isPowerOf2_64(uint64_t Value) {
471 return Value && !(Value & (Value - 1));
472}
473
474/// Return a byte-swapped representation of the 16-bit argument.
475inline uint16_t ByteSwap_16(uint16_t Value) {
476 return sys::SwapByteOrder_16(Value);
477}
478
479/// Return a byte-swapped representation of the 32-bit argument.
480inline uint32_t ByteSwap_32(uint32_t Value) {
481 return sys::SwapByteOrder_32(Value);
482}
483
484/// Return a byte-swapped representation of the 64-bit argument.
485inline uint64_t ByteSwap_64(uint64_t Value) {
486 return sys::SwapByteOrder_64(Value);
487}
488
489/// Count the number of ones from the most significant bit to the first
490/// zero bit.
491///
492/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
493/// Only unsigned integral types are allowed.
494///
495/// \param ZB the behavior on an input of all ones. Only ZB_Width and
496/// ZB_Undefined are valid arguments.
497template <typename T>
498unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
499 static_assert(std::numeric_limits<T>::is_integer &&
500 !std::numeric_limits<T>::is_signed,
501 "Only unsigned integral types are allowed.");
502 return countLeadingZeros<T>(~Value, ZB);
503}
504
505/// Count the number of ones from the least significant bit to the first
506/// zero bit.
507///
508/// Ex. countTrailingOnes(0x00FF00FF) == 8.
509/// Only unsigned integral types are allowed.
510///
511/// \param ZB the behavior on an input of all ones. Only ZB_Width and
512/// ZB_Undefined are valid arguments.
513template <typename T>
514unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
515 static_assert(std::numeric_limits<T>::is_integer &&
516 !std::numeric_limits<T>::is_signed,
517 "Only unsigned integral types are allowed.");
518 return countTrailingZeros<T>(~Value, ZB);
519}
520
521namespace detail {
522template <typename T, std::size_t SizeOfT> struct PopulationCounter {
523 static unsigned count(T Value) {
524 // Generic version, forward to 32 bits.
525 static_assert(SizeOfT <= 4, "Not implemented!");
526#if defined(__GNUC__4)
527 return __builtin_popcount(Value);
528#else
529 uint32_t v = Value;
530 v = v - ((v >> 1) & 0x55555555);
531 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
532 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
533#endif
534 }
535};
536
537template <typename T> struct PopulationCounter<T, 8> {
538 static unsigned count(T Value) {
539#if defined(__GNUC__4)
540 return __builtin_popcountll(Value);
541#else
542 uint64_t v = Value;
543 v = v - ((v >> 1) & 0x5555555555555555ULL);
544 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
545 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
546 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
547#endif
548 }
549};
550} // namespace detail
551
552/// Count the number of set bits in a value.
553/// Ex. countPopulation(0xF000F000) = 8
554/// Returns 0 if the word is zero.
555template <typename T>
556inline unsigned countPopulation(T Value) {
557 static_assert(std::numeric_limits<T>::is_integer &&
558 !std::numeric_limits<T>::is_signed,
559 "Only unsigned integral types are allowed.");
560 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
561}
562
563/// Compile time Log2.
564/// Valid only for positive powers of two.
565template <size_t kValue> constexpr inline size_t CTLog2() {
566 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
567 "Value is not a valid power of 2");
568 return 1 + CTLog2<kValue / 2>();
569}
570
571template <> constexpr inline size_t CTLog2<1>() { return 0; }
572
573/// Return the log base 2 of the specified value.
574inline double Log2(double Value) {
575#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
576 return __builtin_log(Value) / __builtin_log(2.0);
577#else
578 return log2(Value);
579#endif
580}
581
582/// Return the floor log base 2 of the specified value, -1 if the value is zero.
583/// (32 bit edition.)
584/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
585inline unsigned Log2_32(uint32_t Value) {
586 return 31 - countLeadingZeros(Value);
587}
588
589/// Return the floor log base 2 of the specified value, -1 if the value is zero.
590/// (64 bit edition.)
591inline unsigned Log2_64(uint64_t Value) {
592 return 63 - countLeadingZeros(Value);
593}
594
595/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
596/// (32 bit edition).
597/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
598inline unsigned Log2_32_Ceil(uint32_t Value) {
599 return 32 - countLeadingZeros(Value - 1);
600}
601
602/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
603/// (64 bit edition.)
604inline unsigned Log2_64_Ceil(uint64_t Value) {
605 return 64 - countLeadingZeros(Value - 1);
606}
607
608/// Return the greatest common divisor of the values using Euclid's algorithm.
609template <typename T>
610inline T greatestCommonDivisor(T A, T B) {
611 while (B) {
612 T Tmp = B;
613 B = A % B;
614 A = Tmp;
615 }
616 return A;
617}
618
619inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
620 return greatestCommonDivisor<uint64_t>(A, B);
621}
622
623/// This function takes a 64-bit integer and returns the bit equivalent double.
624inline double BitsToDouble(uint64_t Bits) {
625 double D;
626 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
627 memcpy(&D, &Bits, sizeof(Bits));
628 return D;
629}
630
631/// This function takes a 32-bit integer and returns the bit equivalent float.
632inline float BitsToFloat(uint32_t Bits) {
633 float F;
634 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
635 memcpy(&F, &Bits, sizeof(Bits));
636 return F;
637}
638
639/// This function takes a double and returns the bit equivalent 64-bit integer.
640/// Note that copying doubles around changes the bits of NaNs on some hosts,
641/// notably x86, so this routine cannot be used if these bits are needed.
642inline uint64_t DoubleToBits(double Double) {
643 uint64_t Bits;
644 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
645 memcpy(&Bits, &Double, sizeof(Double));
646 return Bits;
647}
648
649/// This function takes a float and returns the bit equivalent 32-bit integer.
650/// Note that copying floats around changes the bits of NaNs on some hosts,
651/// notably x86, so this routine cannot be used if these bits are needed.
652inline uint32_t FloatToBits(float Float) {
653 uint32_t Bits;
654 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
655 memcpy(&Bits, &Float, sizeof(Float));
656 return Bits;
657}
658
659/// A and B are either alignments or offsets. Return the minimum alignment that
660/// may be assumed after adding the two together.
661constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
662 // The largest power of 2 that divides both A and B.
663 //
664 // Replace "-Value" by "1+~Value" in the following commented code to avoid
665 // MSVC warning C4146
666 // return (A | B) & -(A | B);
667 return (A | B) & (1 + ~(A | B));
668}
669
670/// Returns the next power of two (in 64-bits) that is strictly greater than A.
671/// Returns zero on overflow.
672inline uint64_t NextPowerOf2(uint64_t A) {
673 A |= (A >> 1);
674 A |= (A >> 2);
675 A |= (A >> 4);
676 A |= (A >> 8);
677 A |= (A >> 16);
678 A |= (A >> 32);
679 return A + 1;
680}
681
682/// Returns the power of two which is less than or equal to the given value.
683/// Essentially, it is a floor operation across the domain of powers of two.
684inline uint64_t PowerOf2Floor(uint64_t A) {
685 if (!A) return 0;
686 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
687}
688
689/// Returns the power of two which is greater than or equal to the given value.
690/// Essentially, it is a ceil operation across the domain of powers of two.
691inline uint64_t PowerOf2Ceil(uint64_t A) {
692 if (!A)
693 return 0;
694 return NextPowerOf2(A - 1);
695}
696
697/// Returns the next integer (mod 2**64) that is greater than or equal to
698/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
699///
700/// If non-zero \p Skew is specified, the return value will be a minimal
701/// integer that is greater than or equal to \p Value and equal to
702/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
703/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
704///
705/// Examples:
706/// \code
707/// alignTo(5, 8) = 8
708/// alignTo(17, 8) = 24
709/// alignTo(~0LL, 8) = 0
710/// alignTo(321, 255) = 510
711///
712/// alignTo(5, 8, 7) = 7
713/// alignTo(17, 8, 1) = 17
714/// alignTo(~0LL, 8, 3) = 3
715/// alignTo(321, 255, 42) = 552
716/// \endcode
717inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
718 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 718, __PRETTY_FUNCTION__))
;
719 Skew %= Align;
720 return (Value + Align - 1 - Skew) / Align * Align + Skew;
721}
722
723/// Returns the next integer (mod 2**64) that is greater than or equal to
724/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
725template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
726 static_assert(Align != 0u, "Align must be non-zero");
727 return (Value + Align - 1) / Align * Align;
728}
729
730/// Returns the integer ceil(Numerator / Denominator).
731inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
732 return alignTo(Numerator, Denominator) / Denominator;
733}
734
735/// Returns the integer nearest(Numerator / Denominator).
736inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
737 return (Numerator + (Denominator / 2)) / Denominator;
738}
739
740/// Returns the largest uint64_t less than or equal to \p Value and is
741/// \p Skew mod \p Align. \p Align must be non-zero
742inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
743 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 743, __PRETTY_FUNCTION__))
;
744 Skew %= Align;
745 return (Value - Skew) / Align * Align + Skew;
746}
747
748/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
749/// Requires 0 < B <= 32.
750template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
751 static_assert(B > 0, "Bit width can't be 0.");
752 static_assert(B <= 32, "Bit width out of range.");
753 return int32_t(X << (32 - B)) >> (32 - B);
754}
755
756/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
757/// Requires 0 < B < 32.
758inline int32_t SignExtend32(uint32_t X, unsigned B) {
759 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 759, __PRETTY_FUNCTION__))
;
760 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 760, __PRETTY_FUNCTION__))
;
761 return int32_t(X << (32 - B)) >> (32 - B);
762}
763
764/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
765/// Requires 0 < B < 64.
766template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
767 static_assert(B > 0, "Bit width can't be 0.");
768 static_assert(B <= 64, "Bit width out of range.");
769 return int64_t(x << (64 - B)) >> (64 - B);
770}
771
772/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
773/// Requires 0 < B < 64.
774inline int64_t SignExtend64(uint64_t X, unsigned B) {
775 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 775, __PRETTY_FUNCTION__))
;
776 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/Support/MathExtras.h"
, 776, __PRETTY_FUNCTION__))
;
777 return int64_t(X << (64 - B)) >> (64 - B);
778}
779
780/// Subtract two unsigned integers, X and Y, of type T and return the absolute
781/// value of the result.
782template <typename T>
783typename std::enable_if<std::is_unsigned<T>::value, T>::type
784AbsoluteDifference(T X, T Y) {
785 return std::max(X, Y) - std::min(X, Y);
786}
787
788/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
789/// maximum representable value of T on overflow. ResultOverflowed indicates if
790/// the result is larger than the maximum representable value of type T.
791template <typename T>
792typename std::enable_if<std::is_unsigned<T>::value, T>::type
793SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
794 bool Dummy;
795 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
796 // Hacker's Delight, p. 29
797 T Z = X + Y;
798 Overflowed = (Z < X || Z < Y);
799 if (Overflowed)
800 return std::numeric_limits<T>::max();
801 else
802 return Z;
803}
804
805/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
806/// maximum representable value of T on overflow. ResultOverflowed indicates if
807/// the result is larger than the maximum representable value of type T.
808template <typename T>
809typename std::enable_if<std::is_unsigned<T>::value, T>::type
810SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
811 bool Dummy;
812 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
813
814 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
815 // because it fails for uint16_t (where multiplication can have undefined
816 // behavior due to promotion to int), and requires a division in addition
817 // to the multiplication.
818
819 Overflowed = false;
820
821 // Log2(Z) would be either Log2Z or Log2Z + 1.
822 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
823 // will necessarily be less than Log2Max as desired.
824 int Log2Z = Log2_64(X) + Log2_64(Y);
825 const T Max = std::numeric_limits<T>::max();
826 int Log2Max = Log2_64(Max);
827 if (Log2Z < Log2Max) {
828 return X * Y;
829 }
830 if (Log2Z > Log2Max) {
831 Overflowed = true;
832 return Max;
833 }
834
835 // We're going to use the top bit, and maybe overflow one
836 // bit past it. Multiply all but the bottom bit then add
837 // that on at the end.
838 T Z = (X >> 1) * Y;
839 if (Z & ~(Max >> 1)) {
840 Overflowed = true;
841 return Max;
842 }
843 Z <<= 1;
844 if (X & 1)
845 return SaturatingAdd(Z, Y, ResultOverflowed);
846
847 return Z;
848}
849
850/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
851/// the product. Clamp the result to the maximum representable value of T on
852/// overflow. ResultOverflowed indicates if the result is larger than the
853/// maximum representable value of type T.
854template <typename T>
855typename std::enable_if<std::is_unsigned<T>::value, T>::type
856SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
857 bool Dummy;
858 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
859
860 T Product = SaturatingMultiply(X, Y, &Overflowed);
861 if (Overflowed)
862 return Product;
863
864 return SaturatingAdd(A, Product, &Overflowed);
865}
866
867/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
868extern const float huge_valf;
869
870
871/// Add two signed integers, computing the two's complement truncated result,
872/// returning true if overflow occured.
873template <typename T>
874typename std::enable_if<std::is_signed<T>::value, T>::type
875AddOverflow(T X, T Y, T &Result) {
876#if __has_builtin(__builtin_add_overflow)1
877 return __builtin_add_overflow(X, Y, &Result);
878#else
879 // Perform the unsigned addition.
880 using U = typename std::make_unsigned<T>::type;
881 const U UX = static_cast<U>(X);
882 const U UY = static_cast<U>(Y);
883 const U UResult = UX + UY;
884
885 // Convert to signed.
886 Result = static_cast<T>(UResult);
887
888 // Adding two positive numbers should result in a positive number.
889 if (X > 0 && Y > 0)
890 return Result <= 0;
891 // Adding two negatives should result in a negative number.
892 if (X < 0 && Y < 0)
893 return Result >= 0;
894 return false;
895#endif
896}
897
898/// Subtract two signed integers, computing the two's complement truncated
899/// result, returning true if an overflow ocurred.
900template <typename T>
901typename std::enable_if<std::is_signed<T>::value, T>::type
902SubOverflow(T X, T Y, T &Result) {
903#if __has_builtin(__builtin_sub_overflow)1
904 return __builtin_sub_overflow(X, Y, &Result);
905#else
906 // Perform the unsigned addition.
907 using U = typename std::make_unsigned<T>::type;
908 const U UX = static_cast<U>(X);
909 const U UY = static_cast<U>(Y);
910 const U UResult = UX - UY;
911
912 // Convert to signed.
913 Result = static_cast<T>(UResult);
914
915 // Subtracting a positive number from a negative results in a negative number.
916 if (X <= 0 && Y > 0)
917 return Result >= 0;
918 // Subtracting a negative number from a positive results in a positive number.
919 if (X >= 0 && Y < 0)
920 return Result <= 0;
921 return false;
922#endif
923}
924
925
926/// Multiply two signed integers, computing the two's complement truncated
927/// result, returning true if an overflow ocurred.
928template <typename T>
929typename std::enable_if<std::is_signed<T>::value, T>::type
930MulOverflow(T X, T Y, T &Result) {
931 // Perform the unsigned multiplication on absolute values.
932 using U = typename std::make_unsigned<T>::type;
933 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
934 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
935 const U UResult = UX * UY;
936
937 // Convert to signed.
938 const bool IsNegative = (X < 0) ^ (Y < 0);
939 Result = IsNegative ? (0 - UResult) : UResult;
940
941 // If any of the args was 0, result is 0 and no overflow occurs.
942 if (UX == 0 || UY == 0)
943 return false;
944
945 // UX and UY are in [1, 2^n], where n is the number of digits.
946 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
947 // positive) divided by an argument compares to the other.
948 if (IsNegative)
949 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
950 else
951 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
952}
953
954} // End llvm namespace
955
956#endif