Bug Summary

File:lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
Warning:line 223, column 16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM -I /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn374877/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn374877/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn374877=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-10-15-233810-7101-1 -x c++ /build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp

1//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARMTargetTransformInfo.h"
10#include "ARMSubtarget.h"
11#include "MCTargetDesc/ARMAddressingModes.h"
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/SmallVector.h"
14#include "llvm/Analysis/LoopInfo.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/ISDOpcodes.h"
17#include "llvm/CodeGen/ValueTypes.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/CallSite.h"
20#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/DerivedTypes.h"
22#include "llvm/IR/Instruction.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/IntrinsicInst.h"
25#include "llvm/IR/Type.h"
26#include "llvm/MC/SubtargetFeature.h"
27#include "llvm/Support/Casting.h"
28#include "llvm/Support/MachineValueType.h"
29#include "llvm/Target/TargetMachine.h"
30#include <algorithm>
31#include <cassert>
32#include <cstdint>
33#include <utility>
34
35using namespace llvm;
36
37#define DEBUG_TYPE"armtti" "armtti"
38
39static cl::opt<bool> EnableMaskedLoadStores(
40 "enable-arm-maskedldst", cl::Hidden, cl::init(false),
41 cl::desc("Enable the generation of masked loads and stores"));
42
43static cl::opt<bool> DisableLowOverheadLoops(
44 "disable-arm-loloops", cl::Hidden, cl::init(false),
45 cl::desc("Disable the generation of low-overhead loops"));
46
47bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
48 const Function *Callee) const {
49 const TargetMachine &TM = getTLI()->getTargetMachine();
50 const FeatureBitset &CallerBits =
51 TM.getSubtargetImpl(*Caller)->getFeatureBits();
52 const FeatureBitset &CalleeBits =
53 TM.getSubtargetImpl(*Callee)->getFeatureBits();
54
55 // To inline a callee, all features not in the whitelist must match exactly.
56 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
57 (CalleeBits & ~InlineFeatureWhitelist);
58 // For features in the whitelist, the callee's features must be a subset of
59 // the callers'.
60 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
61 (CalleeBits & InlineFeatureWhitelist);
62 return MatchExact && MatchSubset;
63}
64
65int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
66 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 66, __PRETTY_FUNCTION__))
;
9
'?' condition is true
67
68 unsigned Bits = Ty->getPrimitiveSizeInBits();
69 if (Bits == 0 || Imm.getActiveBits() >= 64)
10
Assuming 'Bits' is not equal to 0
11
Assuming the condition is false
12
Taking false branch
70 return 4;
71
72 int64_t SImmVal = Imm.getSExtValue();
73 uint64_t ZImmVal = Imm.getZExtValue();
74 if (!ST->isThumb()) {
13
Taking false branch
75 if ((SImmVal >= 0 && SImmVal < 65536) ||
76 (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
77 (ARM_AM::getSOImmVal(~ZImmVal) != -1))
78 return 1;
79 return ST->hasV6T2Ops() ? 2 : 3;
80 }
81 if (ST->isThumb2()) {
14
Taking false branch
82 if ((SImmVal >= 0 && SImmVal < 65536) ||
83 (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
84 (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
85 return 1;
86 return ST->hasV6T2Ops() ? 2 : 3;
87 }
88 // Thumb1, any i8 imm cost 1.
89 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
15
Assuming 'Bits' is not equal to 8
16
Assuming 'SImmVal' is < 0
90 return 1;
91 if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
17
Assuming the condition is false
18
Calling 'isThumbImmShiftedVal'
92 return 2;
93 // Load from constantpool.
94 return 3;
95}
96
97// Constants smaller than 256 fit in the immediate field of
98// Thumb1 instructions so we return a zero cost and 1 otherwise.
99int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
100 const APInt &Imm, Type *Ty) {
101 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
102 return 0;
103
104 return 1;
105}
106
107int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
108 Type *Ty) {
109 // Division by a constant can be turned into multiplication, but only if we
110 // know it's constant. So it's not so much that the immediate is cheap (it's
111 // not), but that the alternative is worse.
112 // FIXME: this is probably unneeded with GlobalISel.
113 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1
Assuming 'Opcode' is not equal to SDiv
2
Assuming 'Opcode' is not equal to UDiv
114 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
3
Assuming 'Opcode' is not equal to SRem
4
Assuming 'Opcode' is not equal to URem
115 Idx == 1)
116 return 0;
117
118 if (Opcode == Instruction::And) {
5
Assuming 'Opcode' is equal to And
6
Taking true branch
119 // UXTB/UXTH
120 if (Imm == 255 || Imm == 65535)
7
Taking false branch
121 return 0;
122 // Conversion to BIC is free, and means we can use ~Imm instead.
123 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
8
Calling 'ARMTTIImpl::getIntImmCost'
124 }
125
126 if (Opcode == Instruction::Add)
127 // Conversion to SUB is free, and means we can use -Imm instead.
128 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
129
130 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
131 Ty->getIntegerBitWidth() == 32) {
132 int64_t NegImm = -Imm.getSExtValue();
133 if (ST->isThumb2() && NegImm < 1<<12)
134 // icmp X, #-C -> cmn X, #C
135 return 0;
136 if (ST->isThumb() && NegImm < 1<<8)
137 // icmp X, #-C -> adds X, #C
138 return 0;
139 }
140
141 // xor a, -1 can always be folded to MVN
142 if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
143 return 0;
144
145 return getIntImmCost(Imm, Ty);
146}
147
148int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
149 const Instruction *I) {
150 int ISD = TLI->InstructionOpcodeToISD(Opcode);
151 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 151, __PRETTY_FUNCTION__))
;
152
153 // Single to/from double precision conversions.
154 static const CostTblEntry NEONFltDblTbl[] = {
155 // Vector fptrunc/fpext conversions.
156 { ISD::FP_ROUND, MVT::v2f64, 2 },
157 { ISD::FP_EXTEND, MVT::v2f32, 2 },
158 { ISD::FP_EXTEND, MVT::v4f32, 4 }
159 };
160
161 if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
162 ISD == ISD::FP_EXTEND)) {
163 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
164 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
165 return LT.first * Entry->Cost;
166 }
167
168 EVT SrcTy = TLI->getValueType(DL, Src);
169 EVT DstTy = TLI->getValueType(DL, Dst);
170
171 if (!SrcTy.isSimple() || !DstTy.isSimple())
172 return BaseT::getCastInstrCost(Opcode, Dst, Src);
173
174 // The extend of a load is free
175 if (I && isa<LoadInst>(I->getOperand(0))) {
176 static const TypeConversionCostTblEntry LoadConversionTbl[] = {
177 {ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
178 {ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
179 {ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0},
180 {ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0},
181 {ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0},
182 {ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0},
183 {ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1},
184 {ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1},
185 {ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1},
186 {ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1},
187 {ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1},
188 {ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1},
189 };
190 if (const auto *Entry = ConvertCostTableLookup(
191 LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
192 return Entry->Cost;
193
194 static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
195 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0},
196 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0},
197 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0},
198 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0},
199 {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0},
200 {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0},
201 };
202 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
203 if (const auto *Entry =
204 ConvertCostTableLookup(MVELoadConversionTbl, ISD,
205 DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
206 return Entry->Cost;
207 }
208 }
209
210 // Some arithmetic, load and store operations have specific instructions
211 // to cast up/down their types automatically at no extra cost.
212 // TODO: Get these tables to know at least what the related operations are.
213 static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
214 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
215 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
216 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
217 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
218 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
219 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
220
221 // The number of vmovl instructions for the extension.
222 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
223 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
224 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
225 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
226 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
227 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
228 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
229 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
230 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
231 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
232
233 // Operations that we legalize using splitting.
234 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
235 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
236
237 // Vector float <-> i32 conversions.
238 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
239 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
240
241 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
242 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
243 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
244 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
245 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
246 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
247 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
248 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
249 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
250 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
251 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
252 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
253 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
254 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
255 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
256 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
257 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
258 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
259 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
260 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
261
262 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
263 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
264 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
265 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
266 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
267 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
268
269 // Vector double <-> i32 conversions.
270 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
271 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
272
273 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
274 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
275 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
276 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
277 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
278 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
279
280 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
281 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
282 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
283 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
284 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
285 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
286 };
287
288 if (SrcTy.isVector() && ST->hasNEON()) {
289 if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
290 DstTy.getSimpleVT(),
291 SrcTy.getSimpleVT()))
292 return Entry->Cost;
293 }
294
295 // Scalar float to integer conversions.
296 static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
297 { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
298 { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
299 { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
300 { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
301 { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
302 { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
303 { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
304 { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
305 { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
306 { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
307 { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
308 { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
309 { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
310 { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
311 { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
312 { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
313 { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
314 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
315 { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
316 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
317 };
318 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
319 if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
320 DstTy.getSimpleVT(),
321 SrcTy.getSimpleVT()))
322 return Entry->Cost;
323 }
324
325 // Scalar integer to float conversions.
326 static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
327 { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
328 { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
329 { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
330 { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
331 { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
332 { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
333 { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
334 { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
335 { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
336 { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
337 { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
338 { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
339 { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
340 { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
341 { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
342 { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
343 { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
344 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
345 { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
346 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
347 };
348
349 if (SrcTy.isInteger() && ST->hasNEON()) {
350 if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
351 ISD, DstTy.getSimpleVT(),
352 SrcTy.getSimpleVT()))
353 return Entry->Cost;
354 }
355
356 // MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one
357 // instruction, i8->i32 is two. i64 zexts are an VAND with a constant, sext
358 // are linearised so take more.
359 static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = {
360 { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
361 { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
362 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
363 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
364 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 },
365 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 },
366 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
367 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
368 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 },
369 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
370 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 },
371 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 },
372 };
373
374 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
375 if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl,
376 ISD, DstTy.getSimpleVT(),
377 SrcTy.getSimpleVT()))
378 return Entry->Cost * ST->getMVEVectorCostFactor();
379 }
380
381 // Scalar integer conversion costs.
382 static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
383 // i16 -> i64 requires two dependent operations.
384 { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
385
386 // Truncates on i64 are assumed to be free.
387 { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
388 { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
389 { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
390 { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
391 };
392
393 if (SrcTy.isInteger()) {
394 if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
395 DstTy.getSimpleVT(),
396 SrcTy.getSimpleVT()))
397 return Entry->Cost;
398 }
399
400 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
401 ? ST->getMVEVectorCostFactor()
402 : 1;
403 return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src);
404}
405
406int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
407 unsigned Index) {
408 // Penalize inserting into an D-subregister. We end up with a three times
409 // lower estimated throughput on swift.
410 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
411 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
412 return 3;
413
414 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
415 Opcode == Instruction::ExtractElement)) {
416 // Cross-class copies are expensive on many microarchitectures,
417 // so assume they are expensive by default.
418 if (ValTy->getVectorElementType()->isIntegerTy())
419 return 3;
420
421 // Even if it's not a cross class copy, this likely leads to mixing
422 // of NEON and VFP code and should be therefore penalized.
423 if (ValTy->isVectorTy() &&
424 ValTy->getScalarSizeInBits() <= 32)
425 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
426 }
427
428 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
429 Opcode == Instruction::ExtractElement)) {
430 // We say MVE moves costs at least the MVEVectorCostFactor, even though
431 // they are scalar instructions. This helps prevent mixing scalar and
432 // vector, to prevent vectorising where we end up just scalarising the
433 // result anyway.
434 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index),
435 ST->getMVEVectorCostFactor()) *
436 ValTy->getVectorNumElements() / 2;
437 }
438
439 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
440}
441
442int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
443 const Instruction *I) {
444 int ISD = TLI->InstructionOpcodeToISD(Opcode);
445 // On NEON a vector select gets lowered to vbsl.
446 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
447 // Lowering of some vector selects is currently far from perfect.
448 static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
449 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
450 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
451 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
452 };
453
454 EVT SelCondTy = TLI->getValueType(DL, CondTy);
455 EVT SelValTy = TLI->getValueType(DL, ValTy);
456 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
457 if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
458 SelCondTy.getSimpleVT(),
459 SelValTy.getSimpleVT()))
460 return Entry->Cost;
461 }
462
463 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
464 return LT.first;
465 }
466
467 int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy()
468 ? ST->getMVEVectorCostFactor()
469 : 1;
470 return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
471}
472
473int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
474 const SCEV *Ptr) {
475 // Address computations in vectorized code with non-consecutive addresses will
476 // likely result in more instructions compared to scalar code where the
477 // computation can more often be merged into the index mode. The resulting
478 // extra micro-ops can significantly decrease throughput.
479 unsigned NumVectorInstToHideOverhead = 10;
480 int MaxMergeDistance = 64;
481
482 if (ST->hasNEON()) {
483 if (Ty->isVectorTy() && SE &&
484 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
485 return NumVectorInstToHideOverhead;
486
487 // In many cases the address computation is not merged into the instruction
488 // addressing mode.
489 return 1;
490 }
491 return BaseT::getAddressComputationCost(Ty, SE, Ptr);
492}
493
494bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
495 if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps())
496 return false;
497
498 if (DataTy->isVectorTy()) {
499 // We don't yet support narrowing or widening masked loads/stores. Expand
500 // them for the moment.
501 unsigned VecWidth = DataTy->getPrimitiveSizeInBits();
502 if (VecWidth != 128)
503 return false;
504 }
505
506 unsigned EltWidth = DataTy->getScalarSizeInBits();
507 return EltWidth == 32 || EltWidth == 16 || EltWidth == 8;
508}
509
510int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
511 const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
512 assert(MI && "MemcpyInst expected")((MI && "MemcpyInst expected") ? static_cast<void>
(0) : __assert_fail ("MI && \"MemcpyInst expected\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 512, __PRETTY_FUNCTION__))
;
513 ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength());
514
515 // To model the cost of a library call, we assume 1 for the call, and
516 // 3 for the argument setup.
517 const unsigned LibCallCost = 4;
518
519 // If 'size' is not a constant, a library call will be generated.
520 if (!C)
521 return LibCallCost;
522
523 const unsigned Size = C->getValue().getZExtValue();
524 const unsigned DstAlign = MI->getDestAlignment();
525 const unsigned SrcAlign = MI->getSourceAlignment();
526 const Function *F = I->getParent()->getParent();
527 const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
528 std::vector<EVT> MemOps;
529
530 // MemOps will be poplulated with a list of data types that needs to be
531 // loaded and stored. That's why we multiply the number of elements by 2 to
532 // get the cost for this memcpy.
533 if (getTLI()->findOptimalMemOpLowering(
534 MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
535 false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
536 MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
537 F->getAttributes()))
538 return MemOps.size() * 2;
539
540 // If we can't find an optimal memop lowering, return the default cost
541 return LibCallCost;
542}
543
544int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
545 Type *SubTp) {
546 if (ST->hasNEON()) {
547 if (Kind == TTI::SK_Broadcast) {
548 static const CostTblEntry NEONDupTbl[] = {
549 // VDUP handles these cases.
550 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
551 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
552 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
553 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
554 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
555 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
556
557 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
558 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
559 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
560 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
561
562 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
563
564 if (const auto *Entry =
565 CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
566 return LT.first * Entry->Cost;
567 }
568 if (Kind == TTI::SK_Reverse) {
569 static const CostTblEntry NEONShuffleTbl[] = {
570 // Reverse shuffle cost one instruction if we are shuffling within a
571 // double word (vrev) or two if we shuffle a quad word (vrev, vext).
572 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
573 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
574 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
575 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
576 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
577 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
578
579 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
580 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
581 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
582 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
583
584 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
585
586 if (const auto *Entry =
587 CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
588 return LT.first * Entry->Cost;
589 }
590 if (Kind == TTI::SK_Select) {
591 static const CostTblEntry NEONSelShuffleTbl[] = {
592 // Select shuffle cost table for ARM. Cost is the number of
593 // instructions
594 // required to create the shuffled vector.
595
596 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
597 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
598 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
599 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
600
601 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
602 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
603 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
604
605 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
606
607 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
608
609 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
610 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
611 ISD::VECTOR_SHUFFLE, LT.second))
612 return LT.first * Entry->Cost;
613 }
614 }
615 if (ST->hasMVEIntegerOps()) {
616 if (Kind == TTI::SK_Broadcast) {
617 static const CostTblEntry MVEDupTbl[] = {
618 // VDUP handles these cases.
619 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
620 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
621 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1},
622 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
623 {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};
624
625 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
626
627 if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
628 LT.second))
629 return LT.first * Entry->Cost * ST->getMVEVectorCostFactor();
630 }
631 }
632 int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
633 ? ST->getMVEVectorCostFactor()
634 : 1;
635 return BaseCost * BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
636}
637
638int ARMTTIImpl::getArithmeticInstrCost(
639 unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
640 TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
641 TTI::OperandValueProperties Opd2PropInfo,
642 ArrayRef<const Value *> Args) {
643 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
644 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
645
646 const unsigned FunctionCallDivCost = 20;
647 const unsigned ReciprocalDivCost = 10;
648 static const CostTblEntry CostTbl[] = {
649 // Division.
650 // These costs are somewhat random. Choose a cost of 20 to indicate that
651 // vectorizing devision (added function call) is going to be very expensive.
652 // Double registers types.
653 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
654 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
655 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
656 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
657 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
658 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
659 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
660 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
661 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
662 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
663 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
664 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
665 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
666 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
667 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
668 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
669 // Quad register types.
670 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
671 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
672 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
673 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
674 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
675 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
676 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
677 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
678 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
679 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
680 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
681 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
682 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
683 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
684 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
685 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
686 // Multiplication.
687 };
688
689 if (ST->hasNEON()) {
690 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
691 return LT.first * Entry->Cost;
692
693 int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
694 Opd1PropInfo, Opd2PropInfo);
695
696 // This is somewhat of a hack. The problem that we are facing is that SROA
697 // creates a sequence of shift, and, or instructions to construct values.
698 // These sequences are recognized by the ISel and have zero-cost. Not so for
699 // the vectorized code. Because we have support for v2i64 but not i64 those
700 // sequences look particularly beneficial to vectorize.
701 // To work around this we increase the cost of v2i64 operations to make them
702 // seem less beneficial.
703 if (LT.second == MVT::v2i64 &&
704 Op2Info == TargetTransformInfo::OK_UniformConstantValue)
705 Cost += 4;
706
707 return Cost;
708 }
709
710 int BaseCost = ST->hasMVEIntegerOps() && Ty->isVectorTy()
711 ? ST->getMVEVectorCostFactor()
712 : 1;
713
714 // The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
715 // without treating floats as more expensive that scalars or increasing the
716 // costs for custom operations. The results is also multiplied by the
717 // MVEVectorCostFactor where appropriate.
718 if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
719 return LT.first * BaseCost;
720
721 // Else this is expand, assume that we need to scalarize this op.
722 if (Ty->isVectorTy()) {
723 unsigned Num = Ty->getVectorNumElements();
724 unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
725 // Return the cost of multiple scalar invocation plus the cost of
726 // inserting and extracting the values.
727 return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost;
728 }
729
730 return BaseCost;
731}
732
733int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
734 unsigned AddressSpace, const Instruction *I) {
735 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
736
737 if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 &&
738 Src->getVectorElementType()->isDoubleTy()) {
739 // Unaligned loads/stores are extremely inefficient.
740 // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
741 return LT.first * 4;
742 }
743 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
744 ? ST->getMVEVectorCostFactor()
745 : 1;
746 return BaseCost * LT.first;
747}
748
749int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
750 unsigned Factor,
751 ArrayRef<unsigned> Indices,
752 unsigned Alignment,
753 unsigned AddressSpace,
754 bool UseMaskForCond,
755 bool UseMaskForGaps) {
756 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 756, __PRETTY_FUNCTION__))
;
757 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 757, __PRETTY_FUNCTION__))
;
758
759 // vldN/vstN doesn't support vector types of i64/f64 element.
760 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
761
762 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
763 !UseMaskForCond && !UseMaskForGaps) {
764 unsigned NumElts = VecTy->getVectorNumElements();
765 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
766
767 // vldN/vstN only support legal vector types of size 64 or 128 in bits.
768 // Accesses having vector types that are a multiple of 128 bits can be
769 // matched to more than one vldN/vstN instruction.
770 if (NumElts % Factor == 0 &&
771 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
772 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
773 }
774
775 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
776 Alignment, AddressSpace,
777 UseMaskForCond, UseMaskForGaps);
778}
779
780bool ARMTTIImpl::isLoweredToCall(const Function *F) {
781 if (!F->isIntrinsic())
782 BaseT::isLoweredToCall(F);
783
784 // Assume all Arm-specific intrinsics map to an instruction.
785 if (F->getName().startswith("llvm.arm"))
786 return false;
787
788 switch (F->getIntrinsicID()) {
789 default: break;
790 case Intrinsic::powi:
791 case Intrinsic::sin:
792 case Intrinsic::cos:
793 case Intrinsic::pow:
794 case Intrinsic::log:
795 case Intrinsic::log10:
796 case Intrinsic::log2:
797 case Intrinsic::exp:
798 case Intrinsic::exp2:
799 return true;
800 case Intrinsic::sqrt:
801 case Intrinsic::fabs:
802 case Intrinsic::copysign:
803 case Intrinsic::floor:
804 case Intrinsic::ceil:
805 case Intrinsic::trunc:
806 case Intrinsic::rint:
807 case Intrinsic::nearbyint:
808 case Intrinsic::round:
809 case Intrinsic::canonicalize:
810 case Intrinsic::lround:
811 case Intrinsic::llround:
812 case Intrinsic::lrint:
813 case Intrinsic::llrint:
814 if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())
815 return true;
816 if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
817 return true;
818 // Some operations can be handled by vector instructions and assume
819 // unsupported vectors will be expanded into supported scalar ones.
820 // TODO Handle scalar operations properly.
821 return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
822 case Intrinsic::masked_store:
823 case Intrinsic::masked_load:
824 case Intrinsic::masked_gather:
825 case Intrinsic::masked_scatter:
826 return !ST->hasMVEIntegerOps();
827 case Intrinsic::sadd_with_overflow:
828 case Intrinsic::uadd_with_overflow:
829 case Intrinsic::ssub_with_overflow:
830 case Intrinsic::usub_with_overflow:
831 case Intrinsic::sadd_sat:
832 case Intrinsic::uadd_sat:
833 case Intrinsic::ssub_sat:
834 case Intrinsic::usub_sat:
835 return false;
836 }
837
838 return BaseT::isLoweredToCall(F);
839}
840
841bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
842 AssumptionCache &AC,
843 TargetLibraryInfo *LibInfo,
844 HardwareLoopInfo &HWLoopInfo) {
845 // Low-overhead branches are only supported in the 'low-overhead branch'
846 // extension of v8.1-m.
847 if (!ST->hasLOB() || DisableLowOverheadLoops)
848 return false;
849
850 if (!SE.hasLoopInvariantBackedgeTakenCount(L))
851 return false;
852
853 const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
854 if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
855 return false;
856
857 const SCEV *TripCountSCEV =
858 SE.getAddExpr(BackedgeTakenCount,
859 SE.getOne(BackedgeTakenCount->getType()));
860
861 // We need to store the trip count in LR, a 32-bit register.
862 if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32)
863 return false;
864
865 // Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
866 // point in generating a hardware loop if that's going to happen.
867 auto MaybeCall = [this](Instruction &I) {
868 const ARMTargetLowering *TLI = getTLI();
869 unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
870 EVT VT = TLI->getValueType(DL, I.getType(), true);
871 if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
872 return true;
873
874 // Check if an intrinsic will be lowered to a call and assume that any
875 // other CallInst will generate a bl.
876 if (auto *Call = dyn_cast<CallInst>(&I)) {
877 if (isa<IntrinsicInst>(Call)) {
878 if (const Function *F = Call->getCalledFunction())
879 return isLoweredToCall(F);
880 }
881 return true;
882 }
883
884 // FPv5 provides conversions between integer, double-precision,
885 // single-precision, and half-precision formats.
886 switch (I.getOpcode()) {
887 default:
888 break;
889 case Instruction::FPToSI:
890 case Instruction::FPToUI:
891 case Instruction::SIToFP:
892 case Instruction::UIToFP:
893 case Instruction::FPTrunc:
894 case Instruction::FPExt:
895 return !ST->hasFPARMv8Base();
896 }
897
898 // FIXME: Unfortunately the approach of checking the Operation Action does
899 // not catch all cases of Legalization that use library calls. Our
900 // Legalization step categorizes some transformations into library calls as
901 // Custom, Expand or even Legal when doing type legalization. So for now
902 // we have to special case for instance the SDIV of 64bit integers and the
903 // use of floating point emulation.
904 if (VT.isInteger() && VT.getSizeInBits() >= 64) {
905 switch (ISD) {
906 default:
907 break;
908 case ISD::SDIV:
909 case ISD::UDIV:
910 case ISD::SREM:
911 case ISD::UREM:
912 case ISD::SDIVREM:
913 case ISD::UDIVREM:
914 return true;
915 }
916 }
917
918 // Assume all other non-float operations are supported.
919 if (!VT.isFloatingPoint())
920 return false;
921
922 // We'll need a library call to handle most floats when using soft.
923 if (TLI->useSoftFloat()) {
924 switch (I.getOpcode()) {
925 default:
926 return true;
927 case Instruction::Alloca:
928 case Instruction::Load:
929 case Instruction::Store:
930 case Instruction::Select:
931 case Instruction::PHI:
932 return false;
933 }
934 }
935
936 // We'll need a libcall to perform double precision operations on a single
937 // precision only FPU.
938 if (I.getType()->isDoubleTy() && !ST->hasFP64())
939 return true;
940
941 // Likewise for half precision arithmetic.
942 if (I.getType()->isHalfTy() && !ST->hasFullFP16())
943 return true;
944
945 return false;
946 };
947
948 auto IsHardwareLoopIntrinsic = [](Instruction &I) {
949 if (auto *Call = dyn_cast<IntrinsicInst>(&I)) {
950 switch (Call->getIntrinsicID()) {
951 default:
952 break;
953 case Intrinsic::set_loop_iterations:
954 case Intrinsic::test_set_loop_iterations:
955 case Intrinsic::loop_decrement:
956 case Intrinsic::loop_decrement_reg:
957 return true;
958 }
959 }
960 return false;
961 };
962
963 // Scan the instructions to see if there's any that we know will turn into a
964 // call or if this loop is already a low-overhead loop.
965 auto ScanLoop = [&](Loop *L) {
966 for (auto *BB : L->getBlocks()) {
967 for (auto &I : *BB) {
968 if (MaybeCall(I) || IsHardwareLoopIntrinsic(I))
969 return false;
970 }
971 }
972 return true;
973 };
974
975 // Visit inner loops.
976 for (auto Inner : *L)
977 if (!ScanLoop(Inner))
978 return false;
979
980 if (!ScanLoop(L))
981 return false;
982
983 // TODO: Check whether the trip count calculation is expensive. If L is the
984 // inner loop but we know it has a low trip count, calculating that trip
985 // count (in the parent loop) may be detrimental.
986
987 LLVMContext &C = L->getHeader()->getContext();
988 HWLoopInfo.CounterInReg = true;
989 HWLoopInfo.IsNestingLegal = false;
990 HWLoopInfo.PerformEntryTest = true;
991 HWLoopInfo.CountType = Type::getInt32Ty(C);
992 HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
993 return true;
994}
995
996void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
997 TTI::UnrollingPreferences &UP) {
998 // Only currently enable these preferences for M-Class cores.
999 if (!ST->isMClass())
1000 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
1001
1002 // Disable loop unrolling for Oz and Os.
1003 UP.OptSizeThreshold = 0;
1004 UP.PartialOptSizeThreshold = 0;
1005 if (L->getHeader()->getParent()->hasOptSize())
1006 return;
1007
1008 // Only enable on Thumb-2 targets.
1009 if (!ST->isThumb2())
1010 return;
1011
1012 SmallVector<BasicBlock*, 4> ExitingBlocks;
1013 L->getExitingBlocks(ExitingBlocks);
1014 LLVM_DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
1015 << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
1016 << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
;
1017
1018 // Only allow another exit other than the latch. This acts as an early exit
1019 // as it mirrors the profitability calculation of the runtime unroller.
1020 if (ExitingBlocks.size() > 2)
1021 return;
1022
1023 // Limit the CFG of the loop body for targets with a branch predictor.
1024 // Allowing 4 blocks permits if-then-else diamonds in the body.
1025 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
1026 return;
1027
1028 // Scan the loop: don't unroll loops with calls as this could prevent
1029 // inlining.
1030 unsigned Cost = 0;
1031 for (auto *BB : L->getBlocks()) {
1032 for (auto &I : *BB) {
1033 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1034 ImmutableCallSite CS(&I);
1035 if (const Function *F = CS.getCalledFunction()) {
1036 if (!isLoweredToCall(F))
1037 continue;
1038 }
1039 return;
1040 }
1041 // Don't unroll vectorised loop. MVE does not benefit from it as much as
1042 // scalar code.
1043 if (I.getType()->isVectorTy())
1044 return;
1045
1046 SmallVector<const Value*, 4> Operands(I.value_op_begin(),
1047 I.value_op_end());
1048 Cost += getUserCost(&I, Operands);
1049 }
1050 }
1051
1052 LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Cost of loop: " << Cost <<
"\n"; } } while (false)
;
1053
1054 UP.Partial = true;
1055 UP.Runtime = true;
1056 UP.UpperBound = true;
1057 UP.UnrollRemainder = true;
1058 UP.DefaultUnrollRuntimeCount = 4;
1059 UP.UnrollAndJam = true;
1060 UP.UnrollAndJamInnerLoopThreshold = 60;
1061
1062 // Force unrolling small loops can be very useful because of the branch
1063 // taken cost of the backedge.
1064 if (Cost < 12)
1065 UP.Force = true;
1066}
1067
1068bool ARMTTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
1069 TTI::ReductionFlags Flags) const {
1070 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 1070, __PRETTY_FUNCTION__))
;
1071 unsigned ScalarBits = Ty->getScalarSizeInBits();
1072 if (!ST->hasMVEIntegerOps())
1073 return false;
1074
1075 switch (Opcode) {
1076 case Instruction::FAdd:
1077 case Instruction::FMul:
1078 case Instruction::And:
1079 case Instruction::Or:
1080 case Instruction::Xor:
1081 case Instruction::Mul:
1082 case Instruction::FCmp:
1083 return false;
1084 case Instruction::ICmp:
1085 case Instruction::Add:
1086 return ScalarBits < 64 && ScalarBits * Ty->getVectorNumElements() == 128;
1087 default:
1088 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 1088)
;
1089 }
1090 return false;
1091}

/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

1//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM addressing mode implementation stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
14#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
15
16#include "llvm/ADT/APFloat.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/bit.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22
23namespace llvm {
24
25/// ARM_AM - ARM Addressing Mode Stuff
26namespace ARM_AM {
27 enum ShiftOpc {
28 no_shift = 0,
29 asr,
30 lsl,
31 lsr,
32 ror,
33 rrx,
34 uxtw
35 };
36
37 enum AddrOpc {
38 sub = 0,
39 add
40 };
41
42 inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
43
44 inline const char *getShiftOpcStr(ShiftOpc Op) {
45 switch (Op) {
46 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 46)
;
47 case ARM_AM::asr: return "asr";
48 case ARM_AM::lsl: return "lsl";
49 case ARM_AM::lsr: return "lsr";
50 case ARM_AM::ror: return "ror";
51 case ARM_AM::rrx: return "rrx";
52 case ARM_AM::uxtw: return "uxtw";
53 }
54 }
55
56 inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
57 switch (Op) {
58 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 58)
;
59 case ARM_AM::asr: return 2;
60 case ARM_AM::lsl: return 0;
61 case ARM_AM::lsr: return 1;
62 case ARM_AM::ror: return 3;
63 }
64 }
65
66 enum AMSubMode {
67 bad_am_submode = 0,
68 ia,
69 ib,
70 da,
71 db
72 };
73
74 inline const char *getAMSubModeStr(AMSubMode Mode) {
75 switch (Mode) {
76 default: llvm_unreachable("Unknown addressing sub-mode!")::llvm::llvm_unreachable_internal("Unknown addressing sub-mode!"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 76)
;
77 case ARM_AM::ia: return "ia";
78 case ARM_AM::ib: return "ib";
79 case ARM_AM::da: return "da";
80 case ARM_AM::db: return "db";
81 }
82 }
83
84 /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
85 ///
86 inline unsigned rotr32(unsigned Val, unsigned Amt) {
87 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 87, __PRETTY_FUNCTION__))
;
88 return (Val >> Amt) | (Val << ((32-Amt)&31));
89 }
90
91 /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
92 ///
93 inline unsigned rotl32(unsigned Val, unsigned Amt) {
94 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 94, __PRETTY_FUNCTION__))
;
95 return (Val << Amt) | (Val >> ((32-Amt)&31));
96 }
97
98 //===--------------------------------------------------------------------===//
99 // Addressing Mode #1: shift_operand with registers
100 //===--------------------------------------------------------------------===//
101 //
102 // This 'addressing mode' is used for arithmetic instructions. It can
103 // represent things like:
104 // reg
105 // reg [asr|lsl|lsr|ror|rrx] reg
106 // reg [asr|lsl|lsr|ror|rrx] imm
107 //
108 // This is stored three operands [rega, regb, opc]. The first is the base
109 // reg, the second is the shift amount (or reg0 if not present or imm). The
110 // third operand encodes the shift opcode and the imm if a reg isn't present.
111 //
112 inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
113 return ShOp | (Imm << 3);
114 }
115 inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; }
116 inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); }
117
118 /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
119 /// the 8-bit imm value.
120 inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; }
121 /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
122 /// the rotate amount.
123 inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; }
124
125 /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
126 /// computing the rotate amount to use. If this immediate value cannot be
127 /// handled with a single shifter-op, determine a good rotate amount that will
128 /// take a maximal chunk of bits out of the immediate.
129 inline unsigned getSOImmValRotate(unsigned Imm) {
130 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
131 // of zero.
132 if ((Imm & ~255U) == 0) return 0;
133
134 // Use CTZ to compute the rotate amount.
135 unsigned TZ = countTrailingZeros(Imm);
136
137 // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
138 // not 9.
139 unsigned RotAmt = TZ & ~1;
140
141 // If we can handle this spread, return it.
142 if ((rotr32(Imm, RotAmt) & ~255U) == 0)
143 return (32-RotAmt)&31; // HW rotates right, not left.
144
145 // For values like 0xF000000F, we should ignore the low 6 bits, then
146 // retry the hunt.
147 if (Imm & 63U) {
148 unsigned TZ2 = countTrailingZeros(Imm & ~63U);
149 unsigned RotAmt2 = TZ2 & ~1;
150 if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
151 return (32-RotAmt2)&31; // HW rotates right, not left.
152 }
153
154 // Otherwise, we have no way to cover this span of bits with a single
155 // shifter_op immediate. Return a chunk of bits that will be useful to
156 // handle.
157 return (32-RotAmt)&31; // HW rotates right, not left.
158 }
159
160 /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
161 /// into an shifter_operand immediate operand, return the 12-bit encoding for
162 /// it. If not, return -1.
163 inline int getSOImmVal(unsigned Arg) {
164 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
165 // of zero.
166 if ((Arg & ~255U) == 0) return Arg;
167
168 unsigned RotAmt = getSOImmValRotate(Arg);
169
170 // If this cannot be handled with a single shifter_op, bail out.
171 if (rotr32(~255U, RotAmt) & Arg)
172 return -1;
173
174 // Encode this correctly.
175 return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
176 }
177
178 /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
179 /// or'ing together two SOImmVal's.
180 inline bool isSOImmTwoPartVal(unsigned V) {
181 // If this can be handled with a single shifter_op, bail out.
182 V = rotr32(~255U, getSOImmValRotate(V)) & V;
183 if (V == 0)
184 return false;
185
186 // If this can be handled with two shifter_op's, accept.
187 V = rotr32(~255U, getSOImmValRotate(V)) & V;
188 return V == 0;
189 }
190
191 /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
192 /// return the first chunk of it.
193 inline unsigned getSOImmTwoPartFirst(unsigned V) {
194 return rotr32(255U, getSOImmValRotate(V)) & V;
195 }
196
197 /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
198 /// return the second chunk of it.
199 inline unsigned getSOImmTwoPartSecond(unsigned V) {
200 // Mask out the first hunk.
201 V = rotr32(~255U, getSOImmValRotate(V)) & V;
202
203 // Take what's left.
204 assert(V == (rotr32(255U, getSOImmValRotate(V)) & V))((V == (rotr32(255U, getSOImmValRotate(V)) & V)) ? static_cast
<void> (0) : __assert_fail ("V == (rotr32(255U, getSOImmValRotate(V)) & V)"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 204, __PRETTY_FUNCTION__))
;
205 return V;
206 }
207
208 /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
209 /// by a left shift. Returns the shift amount to use.
210 inline unsigned getThumbImmValShift(unsigned Imm) {
211 // 8-bit (or less) immediates are trivially immediate operand with a shift
212 // of zero.
213 if ((Imm & ~255U) == 0) return 0;
20
Assuming the condition is false
21
Taking false branch
214
215 // Use CTZ to compute the shift amount.
216 return countTrailingZeros(Imm);
22
Calling 'countTrailingZeros<unsigned int>'
29
Returning from 'countTrailingZeros<unsigned int>'
30
Returning the value 32
217 }
218
219 /// isThumbImmShiftedVal - Return true if the specified value can be obtained
220 /// by left shifting a 8-bit immediate.
221 inline bool isThumbImmShiftedVal(unsigned V) {
222 // If this can be handled with
223 V = (~255U << getThumbImmValShift(V)) & V;
19
Calling 'getThumbImmValShift'
31
Returning from 'getThumbImmValShift'
32
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
224 return V == 0;
225 }
226
227 /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
228 /// by a left shift. Returns the shift amount to use.
229 inline unsigned getThumbImm16ValShift(unsigned Imm) {
230 // 16-bit (or less) immediates are trivially immediate operand with a shift
231 // of zero.
232 if ((Imm & ~65535U) == 0) return 0;
233
234 // Use CTZ to compute the shift amount.
235 return countTrailingZeros(Imm);
236 }
237
238 /// isThumbImm16ShiftedVal - Return true if the specified value can be
239 /// obtained by left shifting a 16-bit immediate.
240 inline bool isThumbImm16ShiftedVal(unsigned V) {
241 // If this can be handled with
242 V = (~65535U << getThumbImm16ValShift(V)) & V;
243 return V == 0;
244 }
245
246 /// getThumbImmNonShiftedVal - If V is a value that satisfies
247 /// isThumbImmShiftedVal, return the non-shiftd value.
248 inline unsigned getThumbImmNonShiftedVal(unsigned V) {
249 return V >> getThumbImmValShift(V);
250 }
251
252
253 /// getT2SOImmValSplat - Return the 12-bit encoded representation
254 /// if the specified value can be obtained by splatting the low 8 bits
255 /// into every other byte or every byte of a 32-bit value. i.e.,
256 /// 00000000 00000000 00000000 abcdefgh control = 0
257 /// 00000000 abcdefgh 00000000 abcdefgh control = 1
258 /// abcdefgh 00000000 abcdefgh 00000000 control = 2
259 /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
260 /// Return -1 if none of the above apply.
261 /// See ARM Reference Manual A6.3.2.
262 inline int getT2SOImmValSplatVal(unsigned V) {
263 unsigned u, Vs, Imm;
264 // control = 0
265 if ((V & 0xffffff00) == 0)
266 return V;
267
268 // If the value is zeroes in the first byte, just shift those off
269 Vs = ((V & 0xff) == 0) ? V >> 8 : V;
270 // Any passing value only has 8 bits of payload, splatted across the word
271 Imm = Vs & 0xff;
272 // Likewise, any passing values have the payload splatted into the 3rd byte
273 u = Imm | (Imm << 16);
274
275 // control = 1 or 2
276 if (Vs == u)
277 return (((Vs == V) ? 1 : 2) << 8) | Imm;
278
279 // control = 3
280 if (Vs == (u | (u << 8)))
281 return (3 << 8) | Imm;
282
283 return -1;
284 }
285
286 /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
287 /// specified value is a rotated 8-bit value. Return -1 if no rotation
288 /// encoding is possible.
289 /// See ARM Reference Manual A6.3.2.
290 inline int getT2SOImmValRotateVal(unsigned V) {
291 unsigned RotAmt = countLeadingZeros(V);
292 if (RotAmt >= 24)
293 return -1;
294
295 // If 'Arg' can be handled with a single shifter_op return the value.
296 if ((rotr32(0xff000000U, RotAmt) & V) == V)
297 return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
298
299 return -1;
300 }
301
302 /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
303 /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
304 /// encoding for it. If not, return -1.
305 /// See ARM Reference Manual A6.3.2.
306 inline int getT2SOImmVal(unsigned Arg) {
307 // If 'Arg' is an 8-bit splat, then get the encoded value.
308 int Splat = getT2SOImmValSplatVal(Arg);
309 if (Splat != -1)
310 return Splat;
311
312 // If 'Arg' can be handled with a single shifter_op return the value.
313 int Rot = getT2SOImmValRotateVal(Arg);
314 if (Rot != -1)
315 return Rot;
316
317 return -1;
318 }
319
320 inline unsigned getT2SOImmValRotate(unsigned V) {
321 if ((V & ~255U) == 0) return 0;
322 // Use CTZ to compute the rotate amount.
323 unsigned RotAmt = countTrailingZeros(V);
324 return (32 - RotAmt) & 31;
325 }
326
327 inline bool isT2SOImmTwoPartVal(unsigned Imm) {
328 unsigned V = Imm;
329 // Passing values can be any combination of splat values and shifter
330 // values. If this can be handled with a single shifter or splat, bail
331 // out. Those should be handled directly, not with a two-part val.
332 if (getT2SOImmValSplatVal(V) != -1)
333 return false;
334 V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
335 if (V == 0)
336 return false;
337
338 // If this can be handled as an immediate, accept.
339 if (getT2SOImmVal(V) != -1) return true;
340
341 // Likewise, try masking out a splat value first.
342 V = Imm;
343 if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
344 V &= ~0xff00ff00U;
345 else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
346 V &= ~0x00ff00ffU;
347 // If what's left can be handled as an immediate, accept.
348 if (getT2SOImmVal(V) != -1) return true;
349
350 // Otherwise, do not accept.
351 return false;
352 }
353
354 inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
355 assert (isT2SOImmTwoPartVal(Imm) &&((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 356, __PRETTY_FUNCTION__))
356 "Immedate cannot be encoded as two part immediate!")((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 356, __PRETTY_FUNCTION__))
;
357 // Try a shifter operand as one part
358 unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
359 // If the rest is encodable as an immediate, then return it.
360 if (getT2SOImmVal(V) != -1) return V;
361
362 // Try masking out a splat value first.
363 if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
364 return Imm & 0xff00ff00U;
365
366 // The other splat is all that's left as an option.
367 assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1)((getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1) ? static_cast
<void> (0) : __assert_fail ("getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 367, __PRETTY_FUNCTION__))
;
368 return Imm & 0x00ff00ffU;
369 }
370
371 inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
372 // Mask out the first hunk
373 Imm ^= getT2SOImmTwoPartFirst(Imm);
374 // Return what's left
375 assert (getT2SOImmVal(Imm) != -1 &&((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 376, __PRETTY_FUNCTION__))
376 "Unable to encode second part of T2 two part SO immediate")((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 376, __PRETTY_FUNCTION__))
;
377 return Imm;
378 }
379
380
381 //===--------------------------------------------------------------------===//
382 // Addressing Mode #2
383 //===--------------------------------------------------------------------===//
384 //
385 // This is used for most simple load/store instructions.
386 //
387 // addrmode2 := reg +/- reg shop imm
388 // addrmode2 := reg +/- imm12
389 //
390 // The first operand is always a Reg. The second operand is a reg if in
391 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
392 // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
393 // fourth operand 16-17 encodes the index mode.
394 //
395 // If this addressing mode is a frame index (before prolog/epilog insertion
396 // and code rewriting), this operand will have the form: FI#, reg0, <offs>
397 // with no shift amount for the frame offset.
398 //
399 inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
400 unsigned IdxMode = 0) {
401 assert(Imm12 < (1 << 12) && "Imm too large!")((Imm12 < (1 << 12) && "Imm too large!") ? static_cast
<void> (0) : __assert_fail ("Imm12 < (1 << 12) && \"Imm too large!\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 401, __PRETTY_FUNCTION__))
;
402 bool isSub = Opc == sub;
403 return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
404 }
405 inline unsigned getAM2Offset(unsigned AM2Opc) {
406 return AM2Opc & ((1 << 12)-1);
407 }
408 inline AddrOpc getAM2Op(unsigned AM2Opc) {
409 return ((AM2Opc >> 12) & 1) ? sub : add;
410 }
411 inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
412 return (ShiftOpc)((AM2Opc >> 13) & 7);
413 }
414 inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); }
415
416 //===--------------------------------------------------------------------===//
417 // Addressing Mode #3
418 //===--------------------------------------------------------------------===//
419 //
420 // This is used for sign-extending loads, and load/store-pair instructions.
421 //
422 // addrmode3 := reg +/- reg
423 // addrmode3 := reg +/- imm8
424 //
425 // The first operand is always a Reg. The second operand is a reg if in
426 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
427 // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
428 // index mode.
429
430 /// getAM3Opc - This function encodes the addrmode3 opc field.
431 inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
432 unsigned IdxMode = 0) {
433 bool isSub = Opc == sub;
434 return ((int)isSub << 8) | Offset | (IdxMode << 9);
435 }
436 inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; }
437 inline AddrOpc getAM3Op(unsigned AM3Opc) {
438 return ((AM3Opc >> 8) & 1) ? sub : add;
439 }
440 inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); }
441
442 //===--------------------------------------------------------------------===//
443 // Addressing Mode #4
444 //===--------------------------------------------------------------------===//
445 //
446 // This is used for load / store multiple instructions.
447 //
448 // addrmode4 := reg, <mode>
449 //
450 // The four modes are:
451 // IA - Increment after
452 // IB - Increment before
453 // DA - Decrement after
454 // DB - Decrement before
455 // For VFP instructions, only the IA and DB modes are valid.
456
457 inline AMSubMode getAM4SubMode(unsigned Mode) {
458 return (AMSubMode)(Mode & 0x7);
459 }
460
461 inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; }
462
463 //===--------------------------------------------------------------------===//
464 // Addressing Mode #5
465 //===--------------------------------------------------------------------===//
466 //
467 // This is used for coprocessor instructions, such as FP load/stores.
468 //
469 // addrmode5 := reg +/- imm8*4
470 //
471 // The first operand is always a Reg. The second operand encodes the
472 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
473
474 /// getAM5Opc - This function encodes the addrmode5 opc field.
475 inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
476 bool isSub = Opc == sub;
477 return ((int)isSub << 8) | Offset;
478 }
479 inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; }
480 inline AddrOpc getAM5Op(unsigned AM5Opc) {
481 return ((AM5Opc >> 8) & 1) ? sub : add;
482 }
483
484 //===--------------------------------------------------------------------===//
485 // Addressing Mode #5 FP16
486 //===--------------------------------------------------------------------===//
487 //
488 // This is used for coprocessor instructions, such as 16-bit FP load/stores.
489 //
490 // addrmode5fp16 := reg +/- imm8*2
491 //
492 // The first operand is always a Reg. The second operand encodes the
493 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
494
495 /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
496 inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
497 bool isSub = Opc == sub;
498 return ((int)isSub << 8) | Offset;
499 }
500 inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
501 return AM5Opc & 0xFF;
502 }
503 inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
504 return ((AM5Opc >> 8) & 1) ? sub : add;
505 }
506
507 //===--------------------------------------------------------------------===//
508 // Addressing Mode #6
509 //===--------------------------------------------------------------------===//
510 //
511 // This is used for NEON load / store instructions.
512 //
513 // addrmode6 := reg with optional alignment
514 //
515 // This is stored in two operands [regaddr, align]. The first is the
516 // address register. The second operand is the value of the alignment
517 // specifier in bytes or zero if no explicit alignment.
518 // Valid alignments depend on the specific instruction.
519
520 //===--------------------------------------------------------------------===//
521 // NEON/MVE Modified Immediates
522 //===--------------------------------------------------------------------===//
523 //
524 // Several NEON and MVE instructions (e.g., VMOV) take a "modified immediate"
525 // vector operand, where a small immediate encoded in the instruction
526 // specifies a full NEON vector value. These modified immediates are
527 // represented here as encoded integers. The low 8 bits hold the immediate
528 // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
529 // the "Cmode" field of the instruction. The interfaces below treat the
530 // Op and Cmode values as a single 5-bit value.
531
532 inline unsigned createVMOVModImm(unsigned OpCmode, unsigned Val) {
533 return (OpCmode << 8) | Val;
534 }
535 inline unsigned getVMOVModImmOpCmode(unsigned ModImm) {
536 return (ModImm >> 8) & 0x1f;
537 }
538 inline unsigned getVMOVModImmVal(unsigned ModImm) { return ModImm & 0xff; }
539
540 /// decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the
541 /// element value and the element size in bits. (If the element size is
542 /// smaller than the vector, it is splatted into all the elements.)
543 inline uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits) {
544 unsigned OpCmode = getVMOVModImmOpCmode(ModImm);
545 unsigned Imm8 = getVMOVModImmVal(ModImm);
546 uint64_t Val = 0;
547
548 if (OpCmode == 0xe) {
549 // 8-bit vector elements
550 Val = Imm8;
551 EltBits = 8;
552 } else if ((OpCmode & 0xc) == 0x8) {
553 // 16-bit vector elements
554 unsigned ByteNum = (OpCmode & 0x6) >> 1;
555 Val = Imm8 << (8 * ByteNum);
556 EltBits = 16;
557 } else if ((OpCmode & 0x8) == 0) {
558 // 32-bit vector elements, zero with one byte set
559 unsigned ByteNum = (OpCmode & 0x6) >> 1;
560 Val = Imm8 << (8 * ByteNum);
561 EltBits = 32;
562 } else if ((OpCmode & 0xe) == 0xc) {
563 // 32-bit vector elements, one byte with low bits set
564 unsigned ByteNum = 1 + (OpCmode & 0x1);
565 Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
566 EltBits = 32;
567 } else if (OpCmode == 0x1e) {
568 // 64-bit vector elements
569 for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
570 if ((ModImm >> ByteNum) & 1)
571 Val |= (uint64_t)0xff << (8 * ByteNum);
572 }
573 EltBits = 64;
574 } else {
575 llvm_unreachable("Unsupported VMOV immediate")::llvm::llvm_unreachable_internal("Unsupported VMOV immediate"
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 575)
;
576 }
577 return Val;
578 }
579
580 // Generic validation for single-byte immediate (0X00, 00X0, etc).
581 inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
582 assert(Size >= 1 && Size <= 4 && "Invalid size")((Size >= 1 && Size <= 4 && "Invalid size"
) ? static_cast<void> (0) : __assert_fail ("Size >= 1 && Size <= 4 && \"Invalid size\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 582, __PRETTY_FUNCTION__))
;
583 unsigned count = 0;
584 for (unsigned i = 0; i < Size; ++i) {
585 if (Value & 0xff) count++;
586 Value >>= 8;
587 }
588 return count == 1;
589 }
590
591 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
592 inline bool isNEONi16splat(unsigned Value) {
593 if (Value > 0xffff)
594 return false;
595 // i16 value with set bits only in one byte X0 or 0X.
596 return Value == 0 || isNEONBytesplat(Value, 2);
597 }
598
599 // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
600 inline unsigned encodeNEONi16splat(unsigned Value) {
601 assert(isNEONi16splat(Value) && "Invalid NEON splat value")((isNEONi16splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi16splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 601, __PRETTY_FUNCTION__))
;
602 if (Value >= 0x100)
603 Value = (Value >> 8) | 0xa00;
604 else
605 Value |= 0x800;
606 return Value;
607 }
608
609 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
610 inline bool isNEONi32splat(unsigned Value) {
611 // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
612 return Value == 0 || isNEONBytesplat(Value, 4);
613 }
614
615 /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
616 inline unsigned encodeNEONi32splat(unsigned Value) {
617 assert(isNEONi32splat(Value) && "Invalid NEON splat value")((isNEONi32splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi32splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-10~svn374877/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 617, __PRETTY_FUNCTION__))
;
618 if (Value >= 0x100 && Value <= 0xff00)
619 Value = (Value >> 8) | 0x200;
620 else if (Value > 0xffff && Value <= 0xff0000)
621 Value = (Value >> 16) | 0x400;
622 else if (Value > 0xffffff)
623 Value = (Value >> 24) | 0x600;
624 return Value;
625 }
626
627 //===--------------------------------------------------------------------===//
628 // Floating-point Immediates
629 //
630 inline float getFPImmFloat(unsigned Imm) {
631 // We expect an 8-bit binary encoding of a floating-point number here.
632
633 uint8_t Sign = (Imm >> 7) & 0x1;
634 uint8_t Exp = (Imm >> 4) & 0x7;
635 uint8_t Mantissa = Imm & 0xf;
636
637 // 8-bit FP IEEE Float Encoding
638 // abcd efgh aBbbbbbc defgh000 00000000 00000000
639 //
640 // where B = NOT(b);
641 uint32_t I = 0;
642 I |= Sign << 31;
643 I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
644 I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
645 I |= (Exp & 0x3) << 23;
646 I |= Mantissa << 19;
647 return bit_cast<float>(I);
648 }
649
650 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
651 /// floating-point value. If the value cannot be represented as an 8-bit
652 /// floating-point value, then return -1.
653 inline int getFP16Imm(const APInt &Imm) {
654 uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
655 int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
656 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
657
658 // We can handle 4 bits of mantissa.
659 // mantissa = (16+UInt(e:f:g:h))/16.
660 if (Mantissa & 0x3f)
661 return -1;
662 Mantissa >>= 6;
663
664 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
665 if (Exp < -3 || Exp > 4)
666 return -1;
667 Exp = ((Exp+3) & 0x7) ^ 4;
668
669 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
670 }
671
672 inline int getFP16Imm(const APFloat &FPImm) {
673 return getFP16Imm(FPImm.bitcastToAPInt());
674 }
675
676 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
677 /// floating-point value. If the value cannot be represented as an 8-bit
678 /// floating-point value, then return -1.
679 inline int getFP32Imm(const APInt &Imm) {
680 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
681 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
682 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
683
684 // We can handle 4 bits of mantissa.
685 // mantissa = (16+UInt(e:f:g:h))/16.
686 if (Mantissa & 0x7ffff)
687 return -1;
688 Mantissa >>= 19;
689 if ((Mantissa & 0xf) != Mantissa)
690 return -1;
691
692 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
693 if (Exp < -3 || Exp > 4)
694 return -1;
695 Exp = ((Exp+3) & 0x7) ^ 4;
696
697 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
698 }
699
700 inline int getFP32Imm(const APFloat &FPImm) {
701 return getFP32Imm(FPImm.bitcastToAPInt());
702 }
703
704 /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
705 /// floating-point value. If the value cannot be represented as an 8-bit
706 /// floating-point value, then return -1.
707 inline int getFP64Imm(const APInt &Imm) {
708 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
709 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
710 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
711
712 // We can handle 4 bits of mantissa.
713 // mantissa = (16+UInt(e:f:g:h))/16.
714 if (Mantissa & 0xffffffffffffULL)
715 return -1;
716 Mantissa >>= 48;
717 if ((Mantissa & 0xf) != Mantissa)
718 return -1;
719
720 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
721 if (Exp < -3 || Exp > 4)
722 return -1;
723 Exp = ((Exp+3) & 0x7) ^ 4;
724
725 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
726 }
727
728 inline int getFP64Imm(const APFloat &FPImm) {
729 return getFP64Imm(FPImm.bitcastToAPInt());
730 }
731
732} // end namespace ARM_AM
733} // end namespace llvm
734
735#endif
736

/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/SwapByteOrder.h"
18#include <algorithm>
19#include <cassert>
20#include <climits>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB
23.1
'ZB' is not equal to ZB_Undefined
23.1
'ZB' is not equal to ZB_Undefined
23.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val == 0)
24
Assuming 'Val' is equal to 0
25
Taking true branch
117 return 32;
26
Returning the value 32
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB != ZB_Undefined && Val == 0)
133 return 64;
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
23
Calling 'TrailingZerosCounter::count'
27
Returning from 'TrailingZerosCounter::count'
28
Returning the value 32
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 251, __PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315// NOTE: The following support functions use the _32/_64 extensions instead of
316// type overloading so that signed and unsigned integers can be used without
317// ambiguity.
318
319/// Return the high 32 bits of a 64 bit value.
320constexpr inline uint32_t Hi_32(uint64_t Value) {
321 return static_cast<uint32_t>(Value >> 32);
322}
323
324/// Return the low 32 bits of a 64 bit value.
325constexpr inline uint32_t Lo_32(uint64_t Value) {
326 return static_cast<uint32_t>(Value);
327}
328
329/// Make a 64-bit integer from a high / low pair of 32-bit integers.
330constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
331 return ((uint64_t)High << 32) | (uint64_t)Low;
332}
333
334/// Checks if an integer fits into the given bit width.
335template <unsigned N> constexpr inline bool isInt(int64_t x) {
336 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
337}
338// Template specializations to get better code for common cases.
339template <> constexpr inline bool isInt<8>(int64_t x) {
340 return static_cast<int8_t>(x) == x;
341}
342template <> constexpr inline bool isInt<16>(int64_t x) {
343 return static_cast<int16_t>(x) == x;
344}
345template <> constexpr inline bool isInt<32>(int64_t x) {
346 return static_cast<int32_t>(x) == x;
347}
348
349/// Checks if a signed integer is an N bit number shifted left by S.
350template <unsigned N, unsigned S>
351constexpr inline bool isShiftedInt(int64_t x) {
352 static_assert(
353 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
354 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
355 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
356}
357
358/// Checks if an unsigned integer fits into the given bit width.
359///
360/// This is written as two functions rather than as simply
361///
362/// return N >= 64 || X < (UINT64_C(1) << N);
363///
364/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
365/// left too many places.
366template <unsigned N>
367constexpr inline typename std::enable_if<(N < 64), bool>::type
368isUInt(uint64_t X) {
369 static_assert(N > 0, "isUInt<0> doesn't make sense");
370 return X < (UINT64_C(1)1UL << (N));
371}
372template <unsigned N>
373constexpr inline typename std::enable_if<N >= 64, bool>::type
374isUInt(uint64_t X) {
375 return true;
376}
377
378// Template specializations to get better code for common cases.
379template <> constexpr inline bool isUInt<8>(uint64_t x) {
380 return static_cast<uint8_t>(x) == x;
381}
382template <> constexpr inline bool isUInt<16>(uint64_t x) {
383 return static_cast<uint16_t>(x) == x;
384}
385template <> constexpr inline bool isUInt<32>(uint64_t x) {
386 return static_cast<uint32_t>(x) == x;
387}
388
389/// Checks if a unsigned integer is an N bit number shifted left by S.
390template <unsigned N, unsigned S>
391constexpr inline bool isShiftedUInt(uint64_t x) {
392 static_assert(
393 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
394 static_assert(N + S <= 64,
395 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
396 // Per the two static_asserts above, S must be strictly less than 64. So
397 // 1 << S is not undefined behavior.
398 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
399}
400
401/// Gets the maximum value for a N-bit unsigned integer.
402inline uint64_t maxUIntN(uint64_t N) {
403 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 403, __PRETTY_FUNCTION__))
;
404
405 // uint64_t(1) << 64 is undefined behavior, so we can't do
406 // (uint64_t(1) << N) - 1
407 // without checking first that N != 64. But this works and doesn't have a
408 // branch.
409 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
410}
411
412/// Gets the minimum value for a N-bit signed integer.
413inline int64_t minIntN(int64_t N) {
414 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 414, __PRETTY_FUNCTION__))
;
415
416 return -(UINT64_C(1)1UL<<(N-1));
417}
418
419/// Gets the maximum value for a N-bit signed integer.
420inline int64_t maxIntN(int64_t N) {
421 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 421, __PRETTY_FUNCTION__))
;
422
423 // This relies on two's complement wraparound when N == 64, so we convert to
424 // int64_t only at the very end to avoid UB.
425 return (UINT64_C(1)1UL << (N - 1)) - 1;
426}
427
428/// Checks if an unsigned integer fits into the given (dynamic) bit width.
429inline bool isUIntN(unsigned N, uint64_t x) {
430 return N >= 64 || x <= maxUIntN(N);
431}
432
433/// Checks if an signed integer fits into the given (dynamic) bit width.
434inline bool isIntN(unsigned N, int64_t x) {
435 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
436}
437
438/// Return true if the argument is a non-empty sequence of ones starting at the
439/// least significant bit with the remainder zero (32 bit version).
440/// Ex. isMask_32(0x0000FFFFU) == true.
441constexpr inline bool isMask_32(uint32_t Value) {
442 return Value && ((Value + 1) & Value) == 0;
443}
444
445/// Return true if the argument is a non-empty sequence of ones starting at the
446/// least significant bit with the remainder zero (64 bit version).
447constexpr inline bool isMask_64(uint64_t Value) {
448 return Value && ((Value + 1) & Value) == 0;
449}
450
451/// Return true if the argument contains a non-empty sequence of ones with the
452/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
453constexpr inline bool isShiftedMask_32(uint32_t Value) {
454 return Value && isMask_32((Value - 1) | Value);
455}
456
457/// Return true if the argument contains a non-empty sequence of ones with the
458/// remainder zero (64 bit version.)
459constexpr inline bool isShiftedMask_64(uint64_t Value) {
460 return Value && isMask_64((Value - 1) | Value);
461}
462
463/// Return true if the argument is a power of two > 0.
464/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
465constexpr inline bool isPowerOf2_32(uint32_t Value) {
466 return Value && !(Value & (Value - 1));
467}
468
469/// Return true if the argument is a power of two > 0 (64 bit edition.)
470constexpr inline bool isPowerOf2_64(uint64_t Value) {
471 return Value && !(Value & (Value - 1));
472}
473
474/// Return a byte-swapped representation of the 16-bit argument.
475inline uint16_t ByteSwap_16(uint16_t Value) {
476 return sys::SwapByteOrder_16(Value);
477}
478
479/// Return a byte-swapped representation of the 32-bit argument.
480inline uint32_t ByteSwap_32(uint32_t Value) {
481 return sys::SwapByteOrder_32(Value);
482}
483
484/// Return a byte-swapped representation of the 64-bit argument.
485inline uint64_t ByteSwap_64(uint64_t Value) {
486 return sys::SwapByteOrder_64(Value);
487}
488
489/// Count the number of ones from the most significant bit to the first
490/// zero bit.
491///
492/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
493/// Only unsigned integral types are allowed.
494///
495/// \param ZB the behavior on an input of all ones. Only ZB_Width and
496/// ZB_Undefined are valid arguments.
497template <typename T>
498unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
499 static_assert(std::numeric_limits<T>::is_integer &&
500 !std::numeric_limits<T>::is_signed,
501 "Only unsigned integral types are allowed.");
502 return countLeadingZeros<T>(~Value, ZB);
503}
504
505/// Count the number of ones from the least significant bit to the first
506/// zero bit.
507///
508/// Ex. countTrailingOnes(0x00FF00FF) == 8.
509/// Only unsigned integral types are allowed.
510///
511/// \param ZB the behavior on an input of all ones. Only ZB_Width and
512/// ZB_Undefined are valid arguments.
513template <typename T>
514unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
515 static_assert(std::numeric_limits<T>::is_integer &&
516 !std::numeric_limits<T>::is_signed,
517 "Only unsigned integral types are allowed.");
518 return countTrailingZeros<T>(~Value, ZB);
519}
520
521namespace detail {
522template <typename T, std::size_t SizeOfT> struct PopulationCounter {
523 static unsigned count(T Value) {
524 // Generic version, forward to 32 bits.
525 static_assert(SizeOfT <= 4, "Not implemented!");
526#if defined(__GNUC__4)
527 return __builtin_popcount(Value);
528#else
529 uint32_t v = Value;
530 v = v - ((v >> 1) & 0x55555555);
531 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
532 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
533#endif
534 }
535};
536
537template <typename T> struct PopulationCounter<T, 8> {
538 static unsigned count(T Value) {
539#if defined(__GNUC__4)
540 return __builtin_popcountll(Value);
541#else
542 uint64_t v = Value;
543 v = v - ((v >> 1) & 0x5555555555555555ULL);
544 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
545 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
546 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
547#endif
548 }
549};
550} // namespace detail
551
552/// Count the number of set bits in a value.
553/// Ex. countPopulation(0xF000F000) = 8
554/// Returns 0 if the word is zero.
555template <typename T>
556inline unsigned countPopulation(T Value) {
557 static_assert(std::numeric_limits<T>::is_integer &&
558 !std::numeric_limits<T>::is_signed,
559 "Only unsigned integral types are allowed.");
560 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
561}
562
563/// Compile time Log2.
564/// Valid only for positive powers of two.
565template <size_t kValue> constexpr inline size_t CTLog2() {
566 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
567 "Value is not a valid power of 2");
568 return 1 + CTLog2<kValue / 2>();
569}
570
571template <> constexpr inline size_t CTLog2<1>() { return 0; }
572
573/// Return the log base 2 of the specified value.
574inline double Log2(double Value) {
575#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
576 return __builtin_log(Value) / __builtin_log(2.0);
577#else
578 return log2(Value);
579#endif
580}
581
582/// Return the floor log base 2 of the specified value, -1 if the value is zero.
583/// (32 bit edition.)
584/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
585inline unsigned Log2_32(uint32_t Value) {
586 return 31 - countLeadingZeros(Value);
587}
588
589/// Return the floor log base 2 of the specified value, -1 if the value is zero.
590/// (64 bit edition.)
591inline unsigned Log2_64(uint64_t Value) {
592 return 63 - countLeadingZeros(Value);
593}
594
595/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
596/// (32 bit edition).
597/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
598inline unsigned Log2_32_Ceil(uint32_t Value) {
599 return 32 - countLeadingZeros(Value - 1);
600}
601
602/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
603/// (64 bit edition.)
604inline unsigned Log2_64_Ceil(uint64_t Value) {
605 return 64 - countLeadingZeros(Value - 1);
606}
607
608/// Return the greatest common divisor of the values using Euclid's algorithm.
609template <typename T>
610inline T greatestCommonDivisor(T A, T B) {
611 while (B) {
612 T Tmp = B;
613 B = A % B;
614 A = Tmp;
615 }
616 return A;
617}
618
619inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
620 return greatestCommonDivisor<uint64_t>(A, B);
621}
622
623/// This function takes a 64-bit integer and returns the bit equivalent double.
624inline double BitsToDouble(uint64_t Bits) {
625 double D;
626 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
627 memcpy(&D, &Bits, sizeof(Bits));
628 return D;
629}
630
631/// This function takes a 32-bit integer and returns the bit equivalent float.
632inline float BitsToFloat(uint32_t Bits) {
633 float F;
634 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
635 memcpy(&F, &Bits, sizeof(Bits));
636 return F;
637}
638
639/// This function takes a double and returns the bit equivalent 64-bit integer.
640/// Note that copying doubles around changes the bits of NaNs on some hosts,
641/// notably x86, so this routine cannot be used if these bits are needed.
642inline uint64_t DoubleToBits(double Double) {
643 uint64_t Bits;
644 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
645 memcpy(&Bits, &Double, sizeof(Double));
646 return Bits;
647}
648
649/// This function takes a float and returns the bit equivalent 32-bit integer.
650/// Note that copying floats around changes the bits of NaNs on some hosts,
651/// notably x86, so this routine cannot be used if these bits are needed.
652inline uint32_t FloatToBits(float Float) {
653 uint32_t Bits;
654 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
655 memcpy(&Bits, &Float, sizeof(Float));
656 return Bits;
657}
658
659/// A and B are either alignments or offsets. Return the minimum alignment that
660/// may be assumed after adding the two together.
661constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
662 // The largest power of 2 that divides both A and B.
663 //
664 // Replace "-Value" by "1+~Value" in the following commented code to avoid
665 // MSVC warning C4146
666 // return (A | B) & -(A | B);
667 return (A | B) & (1 + ~(A | B));
668}
669
670/// Returns the next power of two (in 64-bits) that is strictly greater than A.
671/// Returns zero on overflow.
672inline uint64_t NextPowerOf2(uint64_t A) {
673 A |= (A >> 1);
674 A |= (A >> 2);
675 A |= (A >> 4);
676 A |= (A >> 8);
677 A |= (A >> 16);
678 A |= (A >> 32);
679 return A + 1;
680}
681
682/// Returns the power of two which is less than or equal to the given value.
683/// Essentially, it is a floor operation across the domain of powers of two.
684inline uint64_t PowerOf2Floor(uint64_t A) {
685 if (!A) return 0;
686 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
687}
688
689/// Returns the power of two which is greater than or equal to the given value.
690/// Essentially, it is a ceil operation across the domain of powers of two.
691inline uint64_t PowerOf2Ceil(uint64_t A) {
692 if (!A)
693 return 0;
694 return NextPowerOf2(A - 1);
695}
696
697/// Returns the next integer (mod 2**64) that is greater than or equal to
698/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
699///
700/// If non-zero \p Skew is specified, the return value will be a minimal
701/// integer that is greater than or equal to \p Value and equal to
702/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
703/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
704///
705/// Examples:
706/// \code
707/// alignTo(5, 8) = 8
708/// alignTo(17, 8) = 24
709/// alignTo(~0LL, 8) = 0
710/// alignTo(321, 255) = 510
711///
712/// alignTo(5, 8, 7) = 7
713/// alignTo(17, 8, 1) = 17
714/// alignTo(~0LL, 8, 3) = 3
715/// alignTo(321, 255, 42) = 552
716/// \endcode
717inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
718 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 718, __PRETTY_FUNCTION__))
;
719 Skew %= Align;
720 return (Value + Align - 1 - Skew) / Align * Align + Skew;
721}
722
723/// Returns the next integer (mod 2**64) that is greater than or equal to
724/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
725template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
726 static_assert(Align != 0u, "Align must be non-zero");
727 return (Value + Align - 1) / Align * Align;
728}
729
730/// Returns the integer ceil(Numerator / Denominator).
731inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
732 return alignTo(Numerator, Denominator) / Denominator;
733}
734
735/// Returns the largest uint64_t less than or equal to \p Value and is
736/// \p Skew mod \p Align. \p Align must be non-zero
737inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
738 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 738, __PRETTY_FUNCTION__))
;
739 Skew %= Align;
740 return (Value - Skew) / Align * Align + Skew;
741}
742
743/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
744/// Requires 0 < B <= 32.
745template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
746 static_assert(B > 0, "Bit width can't be 0.");
747 static_assert(B <= 32, "Bit width out of range.");
748 return int32_t(X << (32 - B)) >> (32 - B);
749}
750
751/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
752/// Requires 0 < B < 32.
753inline int32_t SignExtend32(uint32_t X, unsigned B) {
754 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 754, __PRETTY_FUNCTION__))
;
755 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 755, __PRETTY_FUNCTION__))
;
756 return int32_t(X << (32 - B)) >> (32 - B);
757}
758
759/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
760/// Requires 0 < B < 64.
761template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
762 static_assert(B > 0, "Bit width can't be 0.");
763 static_assert(B <= 64, "Bit width out of range.");
764 return int64_t(x << (64 - B)) >> (64 - B);
765}
766
767/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
768/// Requires 0 < B < 64.
769inline int64_t SignExtend64(uint64_t X, unsigned B) {
770 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 770, __PRETTY_FUNCTION__))
;
771 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-10~svn374877/include/llvm/Support/MathExtras.h"
, 771, __PRETTY_FUNCTION__))
;
772 return int64_t(X << (64 - B)) >> (64 - B);
773}
774
775/// Subtract two unsigned integers, X and Y, of type T and return the absolute
776/// value of the result.
777template <typename T>
778typename std::enable_if<std::is_unsigned<T>::value, T>::type
779AbsoluteDifference(T X, T Y) {
780 return std::max(X, Y) - std::min(X, Y);
781}
782
783/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
784/// maximum representable value of T on overflow. ResultOverflowed indicates if
785/// the result is larger than the maximum representable value of type T.
786template <typename T>
787typename std::enable_if<std::is_unsigned<T>::value, T>::type
788SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
789 bool Dummy;
790 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
791 // Hacker's Delight, p. 29
792 T Z = X + Y;
793 Overflowed = (Z < X || Z < Y);
794 if (Overflowed)
795 return std::numeric_limits<T>::max();
796 else
797 return Z;
798}
799
800/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
801/// maximum representable value of T on overflow. ResultOverflowed indicates if
802/// the result is larger than the maximum representable value of type T.
803template <typename T>
804typename std::enable_if<std::is_unsigned<T>::value, T>::type
805SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
806 bool Dummy;
807 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
808
809 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
810 // because it fails for uint16_t (where multiplication can have undefined
811 // behavior due to promotion to int), and requires a division in addition
812 // to the multiplication.
813
814 Overflowed = false;
815
816 // Log2(Z) would be either Log2Z or Log2Z + 1.
817 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
818 // will necessarily be less than Log2Max as desired.
819 int Log2Z = Log2_64(X) + Log2_64(Y);
820 const T Max = std::numeric_limits<T>::max();
821 int Log2Max = Log2_64(Max);
822 if (Log2Z < Log2Max) {
823 return X * Y;
824 }
825 if (Log2Z > Log2Max) {
826 Overflowed = true;
827 return Max;
828 }
829
830 // We're going to use the top bit, and maybe overflow one
831 // bit past it. Multiply all but the bottom bit then add
832 // that on at the end.
833 T Z = (X >> 1) * Y;
834 if (Z & ~(Max >> 1)) {
835 Overflowed = true;
836 return Max;
837 }
838 Z <<= 1;
839 if (X & 1)
840 return SaturatingAdd(Z, Y, ResultOverflowed);
841
842 return Z;
843}
844
845/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
846/// the product. Clamp the result to the maximum representable value of T on
847/// overflow. ResultOverflowed indicates if the result is larger than the
848/// maximum representable value of type T.
849template <typename T>
850typename std::enable_if<std::is_unsigned<T>::value, T>::type
851SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
852 bool Dummy;
853 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
854
855 T Product = SaturatingMultiply(X, Y, &Overflowed);
856 if (Overflowed)
857 return Product;
858
859 return SaturatingAdd(A, Product, &Overflowed);
860}
861
862/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
863extern const float huge_valf;
864
865
866/// Add two signed integers, computing the two's complement truncated result,
867/// returning true if overflow occured.
868template <typename T>
869typename std::enable_if<std::is_signed<T>::value, T>::type
870AddOverflow(T X, T Y, T &Result) {
871#if __has_builtin(__builtin_add_overflow)1
872 return __builtin_add_overflow(X, Y, &Result);
873#else
874 // Perform the unsigned addition.
875 using U = typename std::make_unsigned<T>::type;
876 const U UX = static_cast<U>(X);
877 const U UY = static_cast<U>(Y);
878 const U UResult = UX + UY;
879
880 // Convert to signed.
881 Result = static_cast<T>(UResult);
882
883 // Adding two positive numbers should result in a positive number.
884 if (X > 0 && Y > 0)
885 return Result <= 0;
886 // Adding two negatives should result in a negative number.
887 if (X < 0 && Y < 0)
888 return Result >= 0;
889 return false;
890#endif
891}
892
893/// Subtract two signed integers, computing the two's complement truncated
894/// result, returning true if an overflow ocurred.
895template <typename T>
896typename std::enable_if<std::is_signed<T>::value, T>::type
897SubOverflow(T X, T Y, T &Result) {
898#if __has_builtin(__builtin_sub_overflow)1
899 return __builtin_sub_overflow(X, Y, &Result);
900#else
901 // Perform the unsigned addition.
902 using U = typename std::make_unsigned<T>::type;
903 const U UX = static_cast<U>(X);
904 const U UY = static_cast<U>(Y);
905 const U UResult = UX - UY;
906
907 // Convert to signed.
908 Result = static_cast<T>(UResult);
909
910 // Subtracting a positive number from a negative results in a negative number.
911 if (X <= 0 && Y > 0)
912 return Result >= 0;
913 // Subtracting a negative number from a positive results in a positive number.
914 if (X >= 0 && Y < 0)
915 return Result <= 0;
916 return false;
917#endif
918}
919
920
921/// Multiply two signed integers, computing the two's complement truncated
922/// result, returning true if an overflow ocurred.
923template <typename T>
924typename std::enable_if<std::is_signed<T>::value, T>::type
925MulOverflow(T X, T Y, T &Result) {
926 // Perform the unsigned multiplication on absolute values.
927 using U = typename std::make_unsigned<T>::type;
928 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
929 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
930 const U UResult = UX * UY;
931
932 // Convert to signed.
933 const bool IsNegative = (X < 0) ^ (Y < 0);
934 Result = IsNegative ? (0 - UResult) : UResult;
935
936 // If any of the args was 0, result is 0 and no overflow occurs.
937 if (UX == 0 || UY == 0)
938 return false;
939
940 // UX and UY are in [1, 2^n], where n is the number of digits.
941 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
942 // positive) divided by an argument compares to the other.
943 if (IsNegative)
944 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
945 else
946 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
947}
948
949} // End llvm namespace
950
951#endif