Bug Summary

File:lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
Warning:line 221, column 16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn362543/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn362543=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-06-05-060531-1271-1 -x c++ /build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp -faddrsig

/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp

1//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARMTargetTransformInfo.h"
10#include "ARMSubtarget.h"
11#include "MCTargetDesc/ARMAddressingModes.h"
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/SmallVector.h"
14#include "llvm/Analysis/LoopInfo.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/ISDOpcodes.h"
17#include "llvm/CodeGen/ValueTypes.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/CallSite.h"
20#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/DerivedTypes.h"
22#include "llvm/IR/Instruction.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/IntrinsicInst.h"
25#include "llvm/IR/Type.h"
26#include "llvm/MC/SubtargetFeature.h"
27#include "llvm/Support/Casting.h"
28#include "llvm/Support/MachineValueType.h"
29#include "llvm/Target/TargetMachine.h"
30#include <algorithm>
31#include <cassert>
32#include <cstdint>
33#include <utility>
34
35using namespace llvm;
36
37#define DEBUG_TYPE"armtti" "armtti"
38
39bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
40 const Function *Callee) const {
41 const TargetMachine &TM = getTLI()->getTargetMachine();
42 const FeatureBitset &CallerBits =
43 TM.getSubtargetImpl(*Caller)->getFeatureBits();
44 const FeatureBitset &CalleeBits =
45 TM.getSubtargetImpl(*Callee)->getFeatureBits();
46
47 // To inline a callee, all features not in the whitelist must match exactly.
48 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
49 (CalleeBits & ~InlineFeatureWhitelist);
50 // For features in the whitelist, the callee's features must be a subset of
51 // the callers'.
52 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
53 (CalleeBits & InlineFeatureWhitelist);
54 return MatchExact && MatchSubset;
55}
56
57int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
58 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 58, __PRETTY_FUNCTION__))
;
9
'?' condition is true
59
60 unsigned Bits = Ty->getPrimitiveSizeInBits();
61 if (Bits == 0 || Imm.getActiveBits() >= 64)
10
Assuming 'Bits' is not equal to 0
11
Assuming the condition is false
12
Taking false branch
62 return 4;
63
64 int64_t SImmVal = Imm.getSExtValue();
65 uint64_t ZImmVal = Imm.getZExtValue();
66 if (!ST->isThumb()) {
13
Taking false branch
67 if ((SImmVal >= 0 && SImmVal < 65536) ||
68 (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
69 (ARM_AM::getSOImmVal(~ZImmVal) != -1))
70 return 1;
71 return ST->hasV6T2Ops() ? 2 : 3;
72 }
73 if (ST->isThumb2()) {
14
Taking false branch
74 if ((SImmVal >= 0 && SImmVal < 65536) ||
75 (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
76 (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
77 return 1;
78 return ST->hasV6T2Ops() ? 2 : 3;
79 }
80 // Thumb1, any i8 imm cost 1.
81 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
15
Assuming 'Bits' is not equal to 8
16
Assuming 'SImmVal' is < 0
82 return 1;
83 if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
17
Assuming the condition is false
18
Calling 'isThumbImmShiftedVal'
84 return 2;
85 // Load from constantpool.
86 return 3;
87}
88
89// Constants smaller than 256 fit in the immediate field of
90// Thumb1 instructions so we return a zero cost and 1 otherwise.
91int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
92 const APInt &Imm, Type *Ty) {
93 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
94 return 0;
95
96 return 1;
97}
98
99int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
100 Type *Ty) {
101 // Division by a constant can be turned into multiplication, but only if we
102 // know it's constant. So it's not so much that the immediate is cheap (it's
103 // not), but that the alternative is worse.
104 // FIXME: this is probably unneeded with GlobalISel.
105 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1
Assuming 'Opcode' is not equal to SDiv
2
Assuming 'Opcode' is not equal to UDiv
106 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
3
Assuming 'Opcode' is not equal to SRem
4
Assuming 'Opcode' is not equal to URem
107 Idx == 1)
108 return 0;
109
110 if (Opcode == Instruction::And) {
5
Assuming 'Opcode' is equal to And
6
Taking true branch
111 // UXTB/UXTH
112 if (Imm == 255 || Imm == 65535)
7
Taking false branch
113 return 0;
114 // Conversion to BIC is free, and means we can use ~Imm instead.
115 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
8
Calling 'ARMTTIImpl::getIntImmCost'
116 }
117
118 if (Opcode == Instruction::Add)
119 // Conversion to SUB is free, and means we can use -Imm instead.
120 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
121
122 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
123 Ty->getIntegerBitWidth() == 32) {
124 int64_t NegImm = -Imm.getSExtValue();
125 if (ST->isThumb2() && NegImm < 1<<12)
126 // icmp X, #-C -> cmn X, #C
127 return 0;
128 if (ST->isThumb() && NegImm < 1<<8)
129 // icmp X, #-C -> adds X, #C
130 return 0;
131 }
132
133 // xor a, -1 can always be folded to MVN
134 if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
135 return 0;
136
137 return getIntImmCost(Imm, Ty);
138}
139
140int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
141 const Instruction *I) {
142 int ISD = TLI->InstructionOpcodeToISD(Opcode);
143 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 143, __PRETTY_FUNCTION__))
;
144
145 // Single to/from double precision conversions.
146 static const CostTblEntry NEONFltDblTbl[] = {
147 // Vector fptrunc/fpext conversions.
148 { ISD::FP_ROUND, MVT::v2f64, 2 },
149 { ISD::FP_EXTEND, MVT::v2f32, 2 },
150 { ISD::FP_EXTEND, MVT::v4f32, 4 }
151 };
152
153 if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
154 ISD == ISD::FP_EXTEND)) {
155 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
156 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
157 return LT.first * Entry->Cost;
158 }
159
160 EVT SrcTy = TLI->getValueType(DL, Src);
161 EVT DstTy = TLI->getValueType(DL, Dst);
162
163 if (!SrcTy.isSimple() || !DstTy.isSimple())
164 return BaseT::getCastInstrCost(Opcode, Dst, Src);
165
166 // Some arithmetic, load and store operations have specific instructions
167 // to cast up/down their types automatically at no extra cost.
168 // TODO: Get these tables to know at least what the related operations are.
169 static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
170 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
171 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
172 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
173 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
174 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
175 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
176
177 // The number of vmovl instructions for the extension.
178 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
179 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
180 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
181 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
182 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
183 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
184 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
185 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
186 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
187 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
188
189 // Operations that we legalize using splitting.
190 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
191 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
192
193 // Vector float <-> i32 conversions.
194 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
195 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
196
197 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
198 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
199 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
200 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
201 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
202 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
203 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
204 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
205 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
206 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
207 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
208 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
209 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
210 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
211 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
212 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
213 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
214 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
215 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
216 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
217
218 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
219 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
220 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
221 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
222 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
223 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
224
225 // Vector double <-> i32 conversions.
226 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
227 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
228
229 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
230 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
231 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
232 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
233 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
234 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
235
236 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
237 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
238 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
239 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
240 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
241 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
242 };
243
244 if (SrcTy.isVector() && ST->hasNEON()) {
245 if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
246 DstTy.getSimpleVT(),
247 SrcTy.getSimpleVT()))
248 return Entry->Cost;
249 }
250
251 // Scalar float to integer conversions.
252 static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
253 { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
254 { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
255 { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
256 { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
257 { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
258 { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
259 { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
260 { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
261 { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
262 { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
263 { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
264 { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
265 { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
266 { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
267 { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
268 { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
269 { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
270 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
271 { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
272 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
273 };
274 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
275 if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
276 DstTy.getSimpleVT(),
277 SrcTy.getSimpleVT()))
278 return Entry->Cost;
279 }
280
281 // Scalar integer to float conversions.
282 static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
283 { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
284 { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
285 { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
286 { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
287 { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
288 { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
289 { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
290 { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
291 { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
292 { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
293 { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
294 { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
295 { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
296 { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
297 { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
298 { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
299 { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
300 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
301 { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
302 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
303 };
304
305 if (SrcTy.isInteger() && ST->hasNEON()) {
306 if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
307 ISD, DstTy.getSimpleVT(),
308 SrcTy.getSimpleVT()))
309 return Entry->Cost;
310 }
311
312 // Scalar integer conversion costs.
313 static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
314 // i16 -> i64 requires two dependent operations.
315 { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
316
317 // Truncates on i64 are assumed to be free.
318 { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
319 { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
320 { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
321 { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
322 };
323
324 if (SrcTy.isInteger()) {
325 if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
326 DstTy.getSimpleVT(),
327 SrcTy.getSimpleVT()))
328 return Entry->Cost;
329 }
330
331 return BaseT::getCastInstrCost(Opcode, Dst, Src);
332}
333
334int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
335 unsigned Index) {
336 // Penalize inserting into an D-subregister. We end up with a three times
337 // lower estimated throughput on swift.
338 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
339 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
340 return 3;
341
342 if ((Opcode == Instruction::InsertElement ||
343 Opcode == Instruction::ExtractElement)) {
344 // Cross-class copies are expensive on many microarchitectures,
345 // so assume they are expensive by default.
346 if (ValTy->getVectorElementType()->isIntegerTy())
347 return 3;
348
349 // Even if it's not a cross class copy, this likely leads to mixing
350 // of NEON and VFP code and should be therefore penalized.
351 if (ValTy->isVectorTy() &&
352 ValTy->getScalarSizeInBits() <= 32)
353 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
354 }
355
356 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
357}
358
359int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
360 const Instruction *I) {
361 int ISD = TLI->InstructionOpcodeToISD(Opcode);
362 // On NEON a vector select gets lowered to vbsl.
363 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
364 // Lowering of some vector selects is currently far from perfect.
365 static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
366 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
367 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
368 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
369 };
370
371 EVT SelCondTy = TLI->getValueType(DL, CondTy);
372 EVT SelValTy = TLI->getValueType(DL, ValTy);
373 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
374 if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
375 SelCondTy.getSimpleVT(),
376 SelValTy.getSimpleVT()))
377 return Entry->Cost;
378 }
379
380 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
381 return LT.first;
382 }
383
384 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
385}
386
387int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
388 const SCEV *Ptr) {
389 // Address computations in vectorized code with non-consecutive addresses will
390 // likely result in more instructions compared to scalar code where the
391 // computation can more often be merged into the index mode. The resulting
392 // extra micro-ops can significantly decrease throughput.
393 unsigned NumVectorInstToHideOverhead = 10;
394 int MaxMergeDistance = 64;
395
396 if (Ty->isVectorTy() && SE &&
397 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
398 return NumVectorInstToHideOverhead;
399
400 // In many cases the address computation is not merged into the instruction
401 // addressing mode.
402 return 1;
403}
404
405int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
406 const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
407 assert(MI && "MemcpyInst expected")((MI && "MemcpyInst expected") ? static_cast<void>
(0) : __assert_fail ("MI && \"MemcpyInst expected\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 407, __PRETTY_FUNCTION__))
;
408 ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength());
409
410 // To model the cost of a library call, we assume 1 for the call, and
411 // 3 for the argument setup.
412 const unsigned LibCallCost = 4;
413
414 // If 'size' is not a constant, a library call will be generated.
415 if (!C)
416 return LibCallCost;
417
418 const unsigned Size = C->getValue().getZExtValue();
419 const unsigned DstAlign = MI->getDestAlignment();
420 const unsigned SrcAlign = MI->getSourceAlignment();
421 const Function *F = I->getParent()->getParent();
422 const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
423 std::vector<EVT> MemOps;
424
425 // MemOps will be poplulated with a list of data types that needs to be
426 // loaded and stored. That's why we multiply the number of elements by 2 to
427 // get the cost for this memcpy.
428 if (getTLI()->findOptimalMemOpLowering(
429 MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
430 false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
431 MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
432 F->getAttributes()))
433 return MemOps.size() * 2;
434
435 // If we can't find an optimal memop lowering, return the default cost
436 return LibCallCost;
437}
438
439int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
440 Type *SubTp) {
441 if (Kind == TTI::SK_Broadcast) {
442 static const CostTblEntry NEONDupTbl[] = {
443 // VDUP handles these cases.
444 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
445 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
446 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
447 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
448 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
449 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
450
451 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
452 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
453 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
454 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
455
456 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
457
458 if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
459 LT.second))
460 return LT.first * Entry->Cost;
461
462 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
463 }
464 if (Kind == TTI::SK_Reverse) {
465 static const CostTblEntry NEONShuffleTbl[] = {
466 // Reverse shuffle cost one instruction if we are shuffling within a
467 // double word (vrev) or two if we shuffle a quad word (vrev, vext).
468 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
469 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
470 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
471 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
472 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
473 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
474
475 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
476 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
477 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
478 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
479
480 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
481
482 if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
483 LT.second))
484 return LT.first * Entry->Cost;
485
486 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
487 }
488 if (Kind == TTI::SK_Select) {
489 static const CostTblEntry NEONSelShuffleTbl[] = {
490 // Select shuffle cost table for ARM. Cost is the number of instructions
491 // required to create the shuffled vector.
492
493 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
494 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
495 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
496 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
497
498 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
499 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
500 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
501
502 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
503
504 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
505
506 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
507 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
508 ISD::VECTOR_SHUFFLE, LT.second))
509 return LT.first * Entry->Cost;
510 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
511 }
512 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
513}
514
515int ARMTTIImpl::getArithmeticInstrCost(
516 unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
517 TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
518 TTI::OperandValueProperties Opd2PropInfo,
519 ArrayRef<const Value *> Args) {
520 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
521 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
522
523 const unsigned FunctionCallDivCost = 20;
524 const unsigned ReciprocalDivCost = 10;
525 static const CostTblEntry CostTbl[] = {
526 // Division.
527 // These costs are somewhat random. Choose a cost of 20 to indicate that
528 // vectorizing devision (added function call) is going to be very expensive.
529 // Double registers types.
530 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
531 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
532 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
533 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
534 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
535 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
536 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
537 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
538 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
539 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
540 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
541 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
542 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
543 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
544 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
545 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
546 // Quad register types.
547 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
548 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
549 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
550 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
551 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
552 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
553 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
554 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
555 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
556 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
557 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
558 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
559 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
560 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
561 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
562 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
563 // Multiplication.
564 };
565
566 if (ST->hasNEON())
567 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
568 return LT.first * Entry->Cost;
569
570 int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
571 Opd1PropInfo, Opd2PropInfo);
572
573 // This is somewhat of a hack. The problem that we are facing is that SROA
574 // creates a sequence of shift, and, or instructions to construct values.
575 // These sequences are recognized by the ISel and have zero-cost. Not so for
576 // the vectorized code. Because we have support for v2i64 but not i64 those
577 // sequences look particularly beneficial to vectorize.
578 // To work around this we increase the cost of v2i64 operations to make them
579 // seem less beneficial.
580 if (LT.second == MVT::v2i64 &&
581 Op2Info == TargetTransformInfo::OK_UniformConstantValue)
582 Cost += 4;
583
584 return Cost;
585}
586
587int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
588 unsigned AddressSpace, const Instruction *I) {
589 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
590
591 if (Src->isVectorTy() && Alignment != 16 &&
592 Src->getVectorElementType()->isDoubleTy()) {
593 // Unaligned loads/stores are extremely inefficient.
594 // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
595 return LT.first * 4;
596 }
597 return LT.first;
598}
599
600int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
601 unsigned Factor,
602 ArrayRef<unsigned> Indices,
603 unsigned Alignment,
604 unsigned AddressSpace,
605 bool UseMaskForCond,
606 bool UseMaskForGaps) {
607 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 607, __PRETTY_FUNCTION__))
;
608 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 608, __PRETTY_FUNCTION__))
;
609
610 // vldN/vstN doesn't support vector types of i64/f64 element.
611 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
612
613 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
614 !UseMaskForCond && !UseMaskForGaps) {
615 unsigned NumElts = VecTy->getVectorNumElements();
616 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
617
618 // vldN/vstN only support legal vector types of size 64 or 128 in bits.
619 // Accesses having vector types that are a multiple of 128 bits can be
620 // matched to more than one vldN/vstN instruction.
621 if (NumElts % Factor == 0 &&
622 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
623 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
624 }
625
626 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
627 Alignment, AddressSpace,
628 UseMaskForCond, UseMaskForGaps);
629}
630
631void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
632 TTI::UnrollingPreferences &UP) {
633 // Only currently enable these preferences for M-Class cores.
634 if (!ST->isMClass())
635 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
636
637 // Disable loop unrolling for Oz and Os.
638 UP.OptSizeThreshold = 0;
639 UP.PartialOptSizeThreshold = 0;
640 if (L->getHeader()->getParent()->hasOptSize())
641 return;
642
643 // Only enable on Thumb-2 targets.
644 if (!ST->isThumb2())
645 return;
646
647 SmallVector<BasicBlock*, 4> ExitingBlocks;
648 L->getExitingBlocks(ExitingBlocks);
649 LLVM_DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
650 << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
651 << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
;
652
653 // Only allow another exit other than the latch. This acts as an early exit
654 // as it mirrors the profitability calculation of the runtime unroller.
655 if (ExitingBlocks.size() > 2)
656 return;
657
658 // Limit the CFG of the loop body for targets with a branch predictor.
659 // Allowing 4 blocks permits if-then-else diamonds in the body.
660 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
661 return;
662
663 // Scan the loop: don't unroll loops with calls as this could prevent
664 // inlining.
665 unsigned Cost = 0;
666 for (auto *BB : L->getBlocks()) {
667 for (auto &I : *BB) {
668 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
669 ImmutableCallSite CS(&I);
670 if (const Function *F = CS.getCalledFunction()) {
671 if (!isLoweredToCall(F))
672 continue;
673 }
674 return;
675 }
676 SmallVector<const Value*, 4> Operands(I.value_op_begin(),
677 I.value_op_end());
678 Cost += getUserCost(&I, Operands);
679 }
680 }
681
682 LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Cost of loop: " << Cost <<
"\n"; } } while (false)
;
683
684 UP.Partial = true;
685 UP.Runtime = true;
686 UP.UnrollRemainder = true;
687 UP.DefaultUnrollRuntimeCount = 4;
688 UP.UnrollAndJam = true;
689 UP.UnrollAndJamInnerLoopThreshold = 60;
690
691 // Force unrolling small loops can be very useful because of the branch
692 // taken cost of the backedge.
693 if (Cost < 12)
694 UP.Force = true;
695}

/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

1//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM addressing mode implementation stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
14#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
15
16#include "llvm/ADT/APFloat.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/bit.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22
23namespace llvm {
24
25/// ARM_AM - ARM Addressing Mode Stuff
26namespace ARM_AM {
27 enum ShiftOpc {
28 no_shift = 0,
29 asr,
30 lsl,
31 lsr,
32 ror,
33 rrx
34 };
35
36 enum AddrOpc {
37 sub = 0,
38 add
39 };
40
41 inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
42
43 inline const char *getShiftOpcStr(ShiftOpc Op) {
44 switch (Op) {
45 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 45)
;
46 case ARM_AM::asr: return "asr";
47 case ARM_AM::lsl: return "lsl";
48 case ARM_AM::lsr: return "lsr";
49 case ARM_AM::ror: return "ror";
50 case ARM_AM::rrx: return "rrx";
51 }
52 }
53
54 inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
55 switch (Op) {
56 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 56)
;
57 case ARM_AM::asr: return 2;
58 case ARM_AM::lsl: return 0;
59 case ARM_AM::lsr: return 1;
60 case ARM_AM::ror: return 3;
61 }
62 }
63
64 enum AMSubMode {
65 bad_am_submode = 0,
66 ia,
67 ib,
68 da,
69 db
70 };
71
72 inline const char *getAMSubModeStr(AMSubMode Mode) {
73 switch (Mode) {
74 default: llvm_unreachable("Unknown addressing sub-mode!")::llvm::llvm_unreachable_internal("Unknown addressing sub-mode!"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 74)
;
75 case ARM_AM::ia: return "ia";
76 case ARM_AM::ib: return "ib";
77 case ARM_AM::da: return "da";
78 case ARM_AM::db: return "db";
79 }
80 }
81
82 /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
83 ///
84 inline unsigned rotr32(unsigned Val, unsigned Amt) {
85 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 85, __PRETTY_FUNCTION__))
;
86 return (Val >> Amt) | (Val << ((32-Amt)&31));
87 }
88
89 /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
90 ///
91 inline unsigned rotl32(unsigned Val, unsigned Amt) {
92 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 92, __PRETTY_FUNCTION__))
;
93 return (Val << Amt) | (Val >> ((32-Amt)&31));
94 }
95
96 //===--------------------------------------------------------------------===//
97 // Addressing Mode #1: shift_operand with registers
98 //===--------------------------------------------------------------------===//
99 //
100 // This 'addressing mode' is used for arithmetic instructions. It can
101 // represent things like:
102 // reg
103 // reg [asr|lsl|lsr|ror|rrx] reg
104 // reg [asr|lsl|lsr|ror|rrx] imm
105 //
106 // This is stored three operands [rega, regb, opc]. The first is the base
107 // reg, the second is the shift amount (or reg0 if not present or imm). The
108 // third operand encodes the shift opcode and the imm if a reg isn't present.
109 //
110 inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
111 return ShOp | (Imm << 3);
112 }
113 inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; }
114 inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); }
115
116 /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
117 /// the 8-bit imm value.
118 inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; }
119 /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
120 /// the rotate amount.
121 inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; }
122
123 /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
124 /// computing the rotate amount to use. If this immediate value cannot be
125 /// handled with a single shifter-op, determine a good rotate amount that will
126 /// take a maximal chunk of bits out of the immediate.
127 inline unsigned getSOImmValRotate(unsigned Imm) {
128 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
129 // of zero.
130 if ((Imm & ~255U) == 0) return 0;
131
132 // Use CTZ to compute the rotate amount.
133 unsigned TZ = countTrailingZeros(Imm);
134
135 // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
136 // not 9.
137 unsigned RotAmt = TZ & ~1;
138
139 // If we can handle this spread, return it.
140 if ((rotr32(Imm, RotAmt) & ~255U) == 0)
141 return (32-RotAmt)&31; // HW rotates right, not left.
142
143 // For values like 0xF000000F, we should ignore the low 6 bits, then
144 // retry the hunt.
145 if (Imm & 63U) {
146 unsigned TZ2 = countTrailingZeros(Imm & ~63U);
147 unsigned RotAmt2 = TZ2 & ~1;
148 if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
149 return (32-RotAmt2)&31; // HW rotates right, not left.
150 }
151
152 // Otherwise, we have no way to cover this span of bits with a single
153 // shifter_op immediate. Return a chunk of bits that will be useful to
154 // handle.
155 return (32-RotAmt)&31; // HW rotates right, not left.
156 }
157
158 /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
159 /// into an shifter_operand immediate operand, return the 12-bit encoding for
160 /// it. If not, return -1.
161 inline int getSOImmVal(unsigned Arg) {
162 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
163 // of zero.
164 if ((Arg & ~255U) == 0) return Arg;
165
166 unsigned RotAmt = getSOImmValRotate(Arg);
167
168 // If this cannot be handled with a single shifter_op, bail out.
169 if (rotr32(~255U, RotAmt) & Arg)
170 return -1;
171
172 // Encode this correctly.
173 return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
174 }
175
176 /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
177 /// or'ing together two SOImmVal's.
178 inline bool isSOImmTwoPartVal(unsigned V) {
179 // If this can be handled with a single shifter_op, bail out.
180 V = rotr32(~255U, getSOImmValRotate(V)) & V;
181 if (V == 0)
182 return false;
183
184 // If this can be handled with two shifter_op's, accept.
185 V = rotr32(~255U, getSOImmValRotate(V)) & V;
186 return V == 0;
187 }
188
189 /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
190 /// return the first chunk of it.
191 inline unsigned getSOImmTwoPartFirst(unsigned V) {
192 return rotr32(255U, getSOImmValRotate(V)) & V;
193 }
194
195 /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
196 /// return the second chunk of it.
197 inline unsigned getSOImmTwoPartSecond(unsigned V) {
198 // Mask out the first hunk.
199 V = rotr32(~255U, getSOImmValRotate(V)) & V;
200
201 // Take what's left.
202 assert(V == (rotr32(255U, getSOImmValRotate(V)) & V))((V == (rotr32(255U, getSOImmValRotate(V)) & V)) ? static_cast
<void> (0) : __assert_fail ("V == (rotr32(255U, getSOImmValRotate(V)) & V)"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 202, __PRETTY_FUNCTION__))
;
203 return V;
204 }
205
206 /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
207 /// by a left shift. Returns the shift amount to use.
208 inline unsigned getThumbImmValShift(unsigned Imm) {
209 // 8-bit (or less) immediates are trivially immediate operand with a shift
210 // of zero.
211 if ((Imm & ~255U) == 0) return 0;
20
Assuming the condition is false
21
Taking false branch
212
213 // Use CTZ to compute the shift amount.
214 return countTrailingZeros(Imm);
22
Calling 'countTrailingZeros<unsigned int>'
29
Returning from 'countTrailingZeros<unsigned int>'
30
Returning the value 32
215 }
216
217 /// isThumbImmShiftedVal - Return true if the specified value can be obtained
218 /// by left shifting a 8-bit immediate.
219 inline bool isThumbImmShiftedVal(unsigned V) {
220 // If this can be handled with
221 V = (~255U << getThumbImmValShift(V)) & V;
19
Calling 'getThumbImmValShift'
31
Returning from 'getThumbImmValShift'
32
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
222 return V == 0;
223 }
224
225 /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
226 /// by a left shift. Returns the shift amount to use.
227 inline unsigned getThumbImm16ValShift(unsigned Imm) {
228 // 16-bit (or less) immediates are trivially immediate operand with a shift
229 // of zero.
230 if ((Imm & ~65535U) == 0) return 0;
231
232 // Use CTZ to compute the shift amount.
233 return countTrailingZeros(Imm);
234 }
235
236 /// isThumbImm16ShiftedVal - Return true if the specified value can be
237 /// obtained by left shifting a 16-bit immediate.
238 inline bool isThumbImm16ShiftedVal(unsigned V) {
239 // If this can be handled with
240 V = (~65535U << getThumbImm16ValShift(V)) & V;
241 return V == 0;
242 }
243
244 /// getThumbImmNonShiftedVal - If V is a value that satisfies
245 /// isThumbImmShiftedVal, return the non-shiftd value.
246 inline unsigned getThumbImmNonShiftedVal(unsigned V) {
247 return V >> getThumbImmValShift(V);
248 }
249
250
251 /// getT2SOImmValSplat - Return the 12-bit encoded representation
252 /// if the specified value can be obtained by splatting the low 8 bits
253 /// into every other byte or every byte of a 32-bit value. i.e.,
254 /// 00000000 00000000 00000000 abcdefgh control = 0
255 /// 00000000 abcdefgh 00000000 abcdefgh control = 1
256 /// abcdefgh 00000000 abcdefgh 00000000 control = 2
257 /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
258 /// Return -1 if none of the above apply.
259 /// See ARM Reference Manual A6.3.2.
260 inline int getT2SOImmValSplatVal(unsigned V) {
261 unsigned u, Vs, Imm;
262 // control = 0
263 if ((V & 0xffffff00) == 0)
264 return V;
265
266 // If the value is zeroes in the first byte, just shift those off
267 Vs = ((V & 0xff) == 0) ? V >> 8 : V;
268 // Any passing value only has 8 bits of payload, splatted across the word
269 Imm = Vs & 0xff;
270 // Likewise, any passing values have the payload splatted into the 3rd byte
271 u = Imm | (Imm << 16);
272
273 // control = 1 or 2
274 if (Vs == u)
275 return (((Vs == V) ? 1 : 2) << 8) | Imm;
276
277 // control = 3
278 if (Vs == (u | (u << 8)))
279 return (3 << 8) | Imm;
280
281 return -1;
282 }
283
284 /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
285 /// specified value is a rotated 8-bit value. Return -1 if no rotation
286 /// encoding is possible.
287 /// See ARM Reference Manual A6.3.2.
288 inline int getT2SOImmValRotateVal(unsigned V) {
289 unsigned RotAmt = countLeadingZeros(V);
290 if (RotAmt >= 24)
291 return -1;
292
293 // If 'Arg' can be handled with a single shifter_op return the value.
294 if ((rotr32(0xff000000U, RotAmt) & V) == V)
295 return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
296
297 return -1;
298 }
299
300 /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
301 /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
302 /// encoding for it. If not, return -1.
303 /// See ARM Reference Manual A6.3.2.
304 inline int getT2SOImmVal(unsigned Arg) {
305 // If 'Arg' is an 8-bit splat, then get the encoded value.
306 int Splat = getT2SOImmValSplatVal(Arg);
307 if (Splat != -1)
308 return Splat;
309
310 // If 'Arg' can be handled with a single shifter_op return the value.
311 int Rot = getT2SOImmValRotateVal(Arg);
312 if (Rot != -1)
313 return Rot;
314
315 return -1;
316 }
317
318 inline unsigned getT2SOImmValRotate(unsigned V) {
319 if ((V & ~255U) == 0) return 0;
320 // Use CTZ to compute the rotate amount.
321 unsigned RotAmt = countTrailingZeros(V);
322 return (32 - RotAmt) & 31;
323 }
324
325 inline bool isT2SOImmTwoPartVal(unsigned Imm) {
326 unsigned V = Imm;
327 // Passing values can be any combination of splat values and shifter
328 // values. If this can be handled with a single shifter or splat, bail
329 // out. Those should be handled directly, not with a two-part val.
330 if (getT2SOImmValSplatVal(V) != -1)
331 return false;
332 V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
333 if (V == 0)
334 return false;
335
336 // If this can be handled as an immediate, accept.
337 if (getT2SOImmVal(V) != -1) return true;
338
339 // Likewise, try masking out a splat value first.
340 V = Imm;
341 if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
342 V &= ~0xff00ff00U;
343 else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
344 V &= ~0x00ff00ffU;
345 // If what's left can be handled as an immediate, accept.
346 if (getT2SOImmVal(V) != -1) return true;
347
348 // Otherwise, do not accept.
349 return false;
350 }
351
352 inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
353 assert (isT2SOImmTwoPartVal(Imm) &&((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __PRETTY_FUNCTION__))
354 "Immedate cannot be encoded as two part immediate!")((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __PRETTY_FUNCTION__))
;
355 // Try a shifter operand as one part
356 unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
357 // If the rest is encodable as an immediate, then return it.
358 if (getT2SOImmVal(V) != -1) return V;
359
360 // Try masking out a splat value first.
361 if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
362 return Imm & 0xff00ff00U;
363
364 // The other splat is all that's left as an option.
365 assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1)((getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1) ? static_cast
<void> (0) : __assert_fail ("getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 365, __PRETTY_FUNCTION__))
;
366 return Imm & 0x00ff00ffU;
367 }
368
369 inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
370 // Mask out the first hunk
371 Imm ^= getT2SOImmTwoPartFirst(Imm);
372 // Return what's left
373 assert (getT2SOImmVal(Imm) != -1 &&((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __PRETTY_FUNCTION__))
374 "Unable to encode second part of T2 two part SO immediate")((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __PRETTY_FUNCTION__))
;
375 return Imm;
376 }
377
378
379 //===--------------------------------------------------------------------===//
380 // Addressing Mode #2
381 //===--------------------------------------------------------------------===//
382 //
383 // This is used for most simple load/store instructions.
384 //
385 // addrmode2 := reg +/- reg shop imm
386 // addrmode2 := reg +/- imm12
387 //
388 // The first operand is always a Reg. The second operand is a reg if in
389 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
390 // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
391 // fourth operand 16-17 encodes the index mode.
392 //
393 // If this addressing mode is a frame index (before prolog/epilog insertion
394 // and code rewriting), this operand will have the form: FI#, reg0, <offs>
395 // with no shift amount for the frame offset.
396 //
397 inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
398 unsigned IdxMode = 0) {
399 assert(Imm12 < (1 << 12) && "Imm too large!")((Imm12 < (1 << 12) && "Imm too large!") ? static_cast
<void> (0) : __assert_fail ("Imm12 < (1 << 12) && \"Imm too large!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 399, __PRETTY_FUNCTION__))
;
400 bool isSub = Opc == sub;
401 return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
402 }
403 inline unsigned getAM2Offset(unsigned AM2Opc) {
404 return AM2Opc & ((1 << 12)-1);
405 }
406 inline AddrOpc getAM2Op(unsigned AM2Opc) {
407 return ((AM2Opc >> 12) & 1) ? sub : add;
408 }
409 inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
410 return (ShiftOpc)((AM2Opc >> 13) & 7);
411 }
412 inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); }
413
414 //===--------------------------------------------------------------------===//
415 // Addressing Mode #3
416 //===--------------------------------------------------------------------===//
417 //
418 // This is used for sign-extending loads, and load/store-pair instructions.
419 //
420 // addrmode3 := reg +/- reg
421 // addrmode3 := reg +/- imm8
422 //
423 // The first operand is always a Reg. The second operand is a reg if in
424 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
425 // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
426 // index mode.
427
428 /// getAM3Opc - This function encodes the addrmode3 opc field.
429 inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
430 unsigned IdxMode = 0) {
431 bool isSub = Opc == sub;
432 return ((int)isSub << 8) | Offset | (IdxMode << 9);
433 }
434 inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; }
435 inline AddrOpc getAM3Op(unsigned AM3Opc) {
436 return ((AM3Opc >> 8) & 1) ? sub : add;
437 }
438 inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); }
439
440 //===--------------------------------------------------------------------===//
441 // Addressing Mode #4
442 //===--------------------------------------------------------------------===//
443 //
444 // This is used for load / store multiple instructions.
445 //
446 // addrmode4 := reg, <mode>
447 //
448 // The four modes are:
449 // IA - Increment after
450 // IB - Increment before
451 // DA - Decrement after
452 // DB - Decrement before
453 // For VFP instructions, only the IA and DB modes are valid.
454
455 inline AMSubMode getAM4SubMode(unsigned Mode) {
456 return (AMSubMode)(Mode & 0x7);
457 }
458
459 inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; }
460
461 //===--------------------------------------------------------------------===//
462 // Addressing Mode #5
463 //===--------------------------------------------------------------------===//
464 //
465 // This is used for coprocessor instructions, such as FP load/stores.
466 //
467 // addrmode5 := reg +/- imm8*4
468 //
469 // The first operand is always a Reg. The second operand encodes the
470 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
471
472 /// getAM5Opc - This function encodes the addrmode5 opc field.
473 inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
474 bool isSub = Opc == sub;
475 return ((int)isSub << 8) | Offset;
476 }
477 inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; }
478 inline AddrOpc getAM5Op(unsigned AM5Opc) {
479 return ((AM5Opc >> 8) & 1) ? sub : add;
480 }
481
482 //===--------------------------------------------------------------------===//
483 // Addressing Mode #5 FP16
484 //===--------------------------------------------------------------------===//
485 //
486 // This is used for coprocessor instructions, such as 16-bit FP load/stores.
487 //
488 // addrmode5fp16 := reg +/- imm8*2
489 //
490 // The first operand is always a Reg. The second operand encodes the
491 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
492
493 /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
494 inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
495 bool isSub = Opc == sub;
496 return ((int)isSub << 8) | Offset;
497 }
498 inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
499 return AM5Opc & 0xFF;
500 }
501 inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
502 return ((AM5Opc >> 8) & 1) ? sub : add;
503 }
504
505 //===--------------------------------------------------------------------===//
506 // Addressing Mode #6
507 //===--------------------------------------------------------------------===//
508 //
509 // This is used for NEON load / store instructions.
510 //
511 // addrmode6 := reg with optional alignment
512 //
513 // This is stored in two operands [regaddr, align]. The first is the
514 // address register. The second operand is the value of the alignment
515 // specifier in bytes or zero if no explicit alignment.
516 // Valid alignments depend on the specific instruction.
517
518 //===--------------------------------------------------------------------===//
519 // NEON Modified Immediates
520 //===--------------------------------------------------------------------===//
521 //
522 // Several NEON instructions (e.g., VMOV) take a "modified immediate"
523 // vector operand, where a small immediate encoded in the instruction
524 // specifies a full NEON vector value. These modified immediates are
525 // represented here as encoded integers. The low 8 bits hold the immediate
526 // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
527 // the "Cmode" field of the instruction. The interfaces below treat the
528 // Op and Cmode values as a single 5-bit value.
529
530 inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
531 return (OpCmode << 8) | Val;
532 }
533 inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
534 return (ModImm >> 8) & 0x1f;
535 }
536 inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; }
537
538 /// decodeNEONModImm - Decode a NEON modified immediate value into the
539 /// element value and the element size in bits. (If the element size is
540 /// smaller than the vector, it is splatted into all the elements.)
541 inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
542 unsigned OpCmode = getNEONModImmOpCmode(ModImm);
543 unsigned Imm8 = getNEONModImmVal(ModImm);
544 uint64_t Val = 0;
545
546 if (OpCmode == 0xe) {
547 // 8-bit vector elements
548 Val = Imm8;
549 EltBits = 8;
550 } else if ((OpCmode & 0xc) == 0x8) {
551 // 16-bit vector elements
552 unsigned ByteNum = (OpCmode & 0x6) >> 1;
553 Val = Imm8 << (8 * ByteNum);
554 EltBits = 16;
555 } else if ((OpCmode & 0x8) == 0) {
556 // 32-bit vector elements, zero with one byte set
557 unsigned ByteNum = (OpCmode & 0x6) >> 1;
558 Val = Imm8 << (8 * ByteNum);
559 EltBits = 32;
560 } else if ((OpCmode & 0xe) == 0xc) {
561 // 32-bit vector elements, one byte with low bits set
562 unsigned ByteNum = 1 + (OpCmode & 0x1);
563 Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
564 EltBits = 32;
565 } else if (OpCmode == 0x1e) {
566 // 64-bit vector elements
567 for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
568 if ((ModImm >> ByteNum) & 1)
569 Val |= (uint64_t)0xff << (8 * ByteNum);
570 }
571 EltBits = 64;
572 } else {
573 llvm_unreachable("Unsupported NEON immediate")::llvm::llvm_unreachable_internal("Unsupported NEON immediate"
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 573)
;
574 }
575 return Val;
576 }
577
578 // Generic validation for single-byte immediate (0X00, 00X0, etc).
579 inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
580 assert(Size >= 1 && Size <= 4 && "Invalid size")((Size >= 1 && Size <= 4 && "Invalid size"
) ? static_cast<void> (0) : __assert_fail ("Size >= 1 && Size <= 4 && \"Invalid size\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 580, __PRETTY_FUNCTION__))
;
581 unsigned count = 0;
582 for (unsigned i = 0; i < Size; ++i) {
583 if (Value & 0xff) count++;
584 Value >>= 8;
585 }
586 return count == 1;
587 }
588
589 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
590 inline bool isNEONi16splat(unsigned Value) {
591 if (Value > 0xffff)
592 return false;
593 // i16 value with set bits only in one byte X0 or 0X.
594 return Value == 0 || isNEONBytesplat(Value, 2);
595 }
596
597 // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
598 inline unsigned encodeNEONi16splat(unsigned Value) {
599 assert(isNEONi16splat(Value) && "Invalid NEON splat value")((isNEONi16splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi16splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 599, __PRETTY_FUNCTION__))
;
600 if (Value >= 0x100)
601 Value = (Value >> 8) | 0xa00;
602 else
603 Value |= 0x800;
604 return Value;
605 }
606
607 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
608 inline bool isNEONi32splat(unsigned Value) {
609 // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
610 return Value == 0 || isNEONBytesplat(Value, 4);
611 }
612
613 /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
614 inline unsigned encodeNEONi32splat(unsigned Value) {
615 assert(isNEONi32splat(Value) && "Invalid NEON splat value")((isNEONi32splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi32splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 615, __PRETTY_FUNCTION__))
;
616 if (Value >= 0x100 && Value <= 0xff00)
617 Value = (Value >> 8) | 0x200;
618 else if (Value > 0xffff && Value <= 0xff0000)
619 Value = (Value >> 16) | 0x400;
620 else if (Value > 0xffffff)
621 Value = (Value >> 24) | 0x600;
622 return Value;
623 }
624
625 //===--------------------------------------------------------------------===//
626 // Floating-point Immediates
627 //
628 inline float getFPImmFloat(unsigned Imm) {
629 // We expect an 8-bit binary encoding of a floating-point number here.
630
631 uint8_t Sign = (Imm >> 7) & 0x1;
632 uint8_t Exp = (Imm >> 4) & 0x7;
633 uint8_t Mantissa = Imm & 0xf;
634
635 // 8-bit FP IEEE Float Encoding
636 // abcd efgh aBbbbbbc defgh000 00000000 00000000
637 //
638 // where B = NOT(b);
639 uint32_t I = 0;
640 I |= Sign << 31;
641 I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
642 I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
643 I |= (Exp & 0x3) << 23;
644 I |= Mantissa << 19;
645 return bit_cast<float>(I);
646 }
647
648 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
649 /// floating-point value. If the value cannot be represented as an 8-bit
650 /// floating-point value, then return -1.
651 inline int getFP16Imm(const APInt &Imm) {
652 uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
653 int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
654 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
655
656 // We can handle 4 bits of mantissa.
657 // mantissa = (16+UInt(e:f:g:h))/16.
658 if (Mantissa & 0x3f)
659 return -1;
660 Mantissa >>= 6;
661
662 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
663 if (Exp < -3 || Exp > 4)
664 return -1;
665 Exp = ((Exp+3) & 0x7) ^ 4;
666
667 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
668 }
669
670 inline int getFP16Imm(const APFloat &FPImm) {
671 return getFP16Imm(FPImm.bitcastToAPInt());
672 }
673
674 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
675 /// floating-point value. If the value cannot be represented as an 8-bit
676 /// floating-point value, then return -1.
677 inline int getFP32Imm(const APInt &Imm) {
678 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
679 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
680 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
681
682 // We can handle 4 bits of mantissa.
683 // mantissa = (16+UInt(e:f:g:h))/16.
684 if (Mantissa & 0x7ffff)
685 return -1;
686 Mantissa >>= 19;
687 if ((Mantissa & 0xf) != Mantissa)
688 return -1;
689
690 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
691 if (Exp < -3 || Exp > 4)
692 return -1;
693 Exp = ((Exp+3) & 0x7) ^ 4;
694
695 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
696 }
697
698 inline int getFP32Imm(const APFloat &FPImm) {
699 return getFP32Imm(FPImm.bitcastToAPInt());
700 }
701
702 /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
703 /// floating-point value. If the value cannot be represented as an 8-bit
704 /// floating-point value, then return -1.
705 inline int getFP64Imm(const APInt &Imm) {
706 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
707 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
708 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
709
710 // We can handle 4 bits of mantissa.
711 // mantissa = (16+UInt(e:f:g:h))/16.
712 if (Mantissa & 0xffffffffffffULL)
713 return -1;
714 Mantissa >>= 48;
715 if ((Mantissa & 0xf) != Mantissa)
716 return -1;
717
718 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
719 if (Exp < -3 || Exp > 4)
720 return -1;
721 Exp = ((Exp+3) & 0x7) ^ 4;
722
723 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
724 }
725
726 inline int getFP64Imm(const APFloat &FPImm) {
727 return getFP64Imm(FPImm.bitcastToAPInt());
728 }
729
730} // end namespace ARM_AM
731} // end namespace llvm
732
733#endif
734

/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/SwapByteOrder.h"
18#include <algorithm>
19#include <cassert>
20#include <climits>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42/// The behavior an operation has on an input of 0.
43enum ZeroBehavior {
44 /// The returned value is undefined.
45 ZB_Undefined,
46 /// The returned value is numeric_limits<T>::max()
47 ZB_Max,
48 /// The returned value is numeric_limits<T>::digits
49 ZB_Width
50};
51
52namespace detail {
53template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
54 static unsigned count(T Val, ZeroBehavior) {
55 if (!Val)
56 return std::numeric_limits<T>::digits;
57 if (Val & 0x1)
58 return 0;
59
60 // Bisection method.
61 unsigned ZeroBits = 0;
62 T Shift = std::numeric_limits<T>::digits >> 1;
63 T Mask = std::numeric_limits<T>::max() >> Shift;
64 while (Shift) {
65 if ((Val & Mask) == 0) {
66 Val >>= Shift;
67 ZeroBits |= Shift;
68 }
69 Shift >>= 1;
70 Mask >>= Shift;
71 }
72 return ZeroBits;
73 }
74};
75
76#if __GNUC__4 >= 4 || defined(_MSC_VER)
77template <typename T> struct TrailingZerosCounter<T, 4> {
78 static unsigned count(T Val, ZeroBehavior ZB) {
79 if (ZB != ZB_Undefined && Val == 0)
24
Assuming 'Val' is equal to 0
25
Taking true branch
80 return 32;
26
Returning the value 32
81
82#if __has_builtin(__builtin_ctz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
83 return __builtin_ctz(Val);
84#elif defined(_MSC_VER)
85 unsigned long Index;
86 _BitScanForward(&Index, Val);
87 return Index;
88#endif
89 }
90};
91
92#if !defined(_MSC_VER) || defined(_M_X64)
93template <typename T> struct TrailingZerosCounter<T, 8> {
94 static unsigned count(T Val, ZeroBehavior ZB) {
95 if (ZB != ZB_Undefined && Val == 0)
96 return 64;
97
98#if __has_builtin(__builtin_ctzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
99 return __builtin_ctzll(Val);
100#elif defined(_MSC_VER)
101 unsigned long Index;
102 _BitScanForward64(&Index, Val);
103 return Index;
104#endif
105 }
106};
107#endif
108#endif
109} // namespace detail
110
111/// Count number of 0's from the least significant bit to the most
112/// stopping at the first 1.
113///
114/// Only unsigned integral types are allowed.
115///
116/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
117/// valid arguments.
118template <typename T>
119unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
120 static_assert(std::numeric_limits<T>::is_integer &&
121 !std::numeric_limits<T>::is_signed,
122 "Only unsigned integral types are allowed.");
123 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
23
Calling 'TrailingZerosCounter::count'
27
Returning from 'TrailingZerosCounter::count'
28
Returning the value 32
124}
125
126namespace detail {
127template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
128 static unsigned count(T Val, ZeroBehavior) {
129 if (!Val)
130 return std::numeric_limits<T>::digits;
131
132 // Bisection method.
133 unsigned ZeroBits = 0;
134 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
135 T Tmp = Val >> Shift;
136 if (Tmp)
137 Val = Tmp;
138 else
139 ZeroBits |= Shift;
140 }
141 return ZeroBits;
142 }
143};
144
145#if __GNUC__4 >= 4 || defined(_MSC_VER)
146template <typename T> struct LeadingZerosCounter<T, 4> {
147 static unsigned count(T Val, ZeroBehavior ZB) {
148 if (ZB != ZB_Undefined && Val == 0)
149 return 32;
150
151#if __has_builtin(__builtin_clz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
152 return __builtin_clz(Val);
153#elif defined(_MSC_VER)
154 unsigned long Index;
155 _BitScanReverse(&Index, Val);
156 return Index ^ 31;
157#endif
158 }
159};
160
161#if !defined(_MSC_VER) || defined(_M_X64)
162template <typename T> struct LeadingZerosCounter<T, 8> {
163 static unsigned count(T Val, ZeroBehavior ZB) {
164 if (ZB != ZB_Undefined && Val == 0)
165 return 64;
166
167#if __has_builtin(__builtin_clzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
168 return __builtin_clzll(Val);
169#elif defined(_MSC_VER)
170 unsigned long Index;
171 _BitScanReverse64(&Index, Val);
172 return Index ^ 63;
173#endif
174 }
175};
176#endif
177#endif
178} // namespace detail
179
180/// Count number of 0's from the most significant bit to the least
181/// stopping at the first 1.
182///
183/// Only unsigned integral types are allowed.
184///
185/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
186/// valid arguments.
187template <typename T>
188unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
189 static_assert(std::numeric_limits<T>::is_integer &&
190 !std::numeric_limits<T>::is_signed,
191 "Only unsigned integral types are allowed.");
192 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
193}
194
195/// Get the index of the first set bit starting from the least
196/// significant bit.
197///
198/// Only unsigned integral types are allowed.
199///
200/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
201/// valid arguments.
202template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
203 if (ZB == ZB_Max && Val == 0)
204 return std::numeric_limits<T>::max();
205
206 return countTrailingZeros(Val, ZB_Undefined);
207}
208
209/// Create a bitmask with the N right-most bits set to 1, and all other
210/// bits set to 0. Only unsigned types are allowed.
211template <typename T> T maskTrailingOnes(unsigned N) {
212 static_assert(std::is_unsigned<T>::value, "Invalid type!");
213 const unsigned Bits = CHAR_BIT8 * sizeof(T);
214 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 214, __PRETTY_FUNCTION__))
;
215 return N == 0 ? 0 : (T(-1) >> (Bits - N));
216}
217
218/// Create a bitmask with the N left-most bits set to 1, and all other
219/// bits set to 0. Only unsigned types are allowed.
220template <typename T> T maskLeadingOnes(unsigned N) {
221 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
222}
223
224/// Create a bitmask with the N right-most bits set to 0, and all other
225/// bits set to 1. Only unsigned types are allowed.
226template <typename T> T maskTrailingZeros(unsigned N) {
227 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
228}
229
230/// Create a bitmask with the N left-most bits set to 0, and all other
231/// bits set to 1. Only unsigned types are allowed.
232template <typename T> T maskLeadingZeros(unsigned N) {
233 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
234}
235
236/// Get the index of the last set bit starting from the least
237/// significant bit.
238///
239/// Only unsigned integral types are allowed.
240///
241/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
242/// valid arguments.
243template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
244 if (ZB == ZB_Max && Val == 0)
245 return std::numeric_limits<T>::max();
246
247 // Use ^ instead of - because both gcc and llvm can remove the associated ^
248 // in the __builtin_clz intrinsic on x86.
249 return countLeadingZeros(Val, ZB_Undefined) ^
250 (std::numeric_limits<T>::digits - 1);
251}
252
253/// Macro compressed bit reversal table for 256 bits.
254///
255/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
256static const unsigned char BitReverseTable256[256] = {
257#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
258#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
259#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
260 R6(0), R6(2), R6(1), R6(3)
261#undef R2
262#undef R4
263#undef R6
264};
265
266/// Reverse the bits in \p Val.
267template <typename T>
268T reverseBits(T Val) {
269 unsigned char in[sizeof(Val)];
270 unsigned char out[sizeof(Val)];
271 std::memcpy(in, &Val, sizeof(Val));
272 for (unsigned i = 0; i < sizeof(Val); ++i)
273 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
274 std::memcpy(&Val, out, sizeof(Val));
275 return Val;
276}
277
278// NOTE: The following support functions use the _32/_64 extensions instead of
279// type overloading so that signed and unsigned integers can be used without
280// ambiguity.
281
282/// Return the high 32 bits of a 64 bit value.
283constexpr inline uint32_t Hi_32(uint64_t Value) {
284 return static_cast<uint32_t>(Value >> 32);
285}
286
287/// Return the low 32 bits of a 64 bit value.
288constexpr inline uint32_t Lo_32(uint64_t Value) {
289 return static_cast<uint32_t>(Value);
290}
291
292/// Make a 64-bit integer from a high / low pair of 32-bit integers.
293constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
294 return ((uint64_t)High << 32) | (uint64_t)Low;
295}
296
297/// Checks if an integer fits into the given bit width.
298template <unsigned N> constexpr inline bool isInt(int64_t x) {
299 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
300}
301// Template specializations to get better code for common cases.
302template <> constexpr inline bool isInt<8>(int64_t x) {
303 return static_cast<int8_t>(x) == x;
304}
305template <> constexpr inline bool isInt<16>(int64_t x) {
306 return static_cast<int16_t>(x) == x;
307}
308template <> constexpr inline bool isInt<32>(int64_t x) {
309 return static_cast<int32_t>(x) == x;
310}
311
312/// Checks if a signed integer is an N bit number shifted left by S.
313template <unsigned N, unsigned S>
314constexpr inline bool isShiftedInt(int64_t x) {
315 static_assert(
316 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
317 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
318 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
319}
320
321/// Checks if an unsigned integer fits into the given bit width.
322///
323/// This is written as two functions rather than as simply
324///
325/// return N >= 64 || X < (UINT64_C(1) << N);
326///
327/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
328/// left too many places.
329template <unsigned N>
330constexpr inline typename std::enable_if<(N < 64), bool>::type
331isUInt(uint64_t X) {
332 static_assert(N > 0, "isUInt<0> doesn't make sense");
333 return X < (UINT64_C(1)1UL << (N));
334}
335template <unsigned N>
336constexpr inline typename std::enable_if<N >= 64, bool>::type
337isUInt(uint64_t X) {
338 return true;
339}
340
341// Template specializations to get better code for common cases.
342template <> constexpr inline bool isUInt<8>(uint64_t x) {
343 return static_cast<uint8_t>(x) == x;
344}
345template <> constexpr inline bool isUInt<16>(uint64_t x) {
346 return static_cast<uint16_t>(x) == x;
347}
348template <> constexpr inline bool isUInt<32>(uint64_t x) {
349 return static_cast<uint32_t>(x) == x;
350}
351
352/// Checks if a unsigned integer is an N bit number shifted left by S.
353template <unsigned N, unsigned S>
354constexpr inline bool isShiftedUInt(uint64_t x) {
355 static_assert(
356 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
357 static_assert(N + S <= 64,
358 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
359 // Per the two static_asserts above, S must be strictly less than 64. So
360 // 1 << S is not undefined behavior.
361 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
362}
363
364/// Gets the maximum value for a N-bit unsigned integer.
365inline uint64_t maxUIntN(uint64_t N) {
366 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 366, __PRETTY_FUNCTION__))
;
367
368 // uint64_t(1) << 64 is undefined behavior, so we can't do
369 // (uint64_t(1) << N) - 1
370 // without checking first that N != 64. But this works and doesn't have a
371 // branch.
372 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
373}
374
375/// Gets the minimum value for a N-bit signed integer.
376inline int64_t minIntN(int64_t N) {
377 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 377, __PRETTY_FUNCTION__))
;
378
379 return -(UINT64_C(1)1UL<<(N-1));
380}
381
382/// Gets the maximum value for a N-bit signed integer.
383inline int64_t maxIntN(int64_t N) {
384 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 384, __PRETTY_FUNCTION__))
;
385
386 // This relies on two's complement wraparound when N == 64, so we convert to
387 // int64_t only at the very end to avoid UB.
388 return (UINT64_C(1)1UL << (N - 1)) - 1;
389}
390
391/// Checks if an unsigned integer fits into the given (dynamic) bit width.
392inline bool isUIntN(unsigned N, uint64_t x) {
393 return N >= 64 || x <= maxUIntN(N);
394}
395
396/// Checks if an signed integer fits into the given (dynamic) bit width.
397inline bool isIntN(unsigned N, int64_t x) {
398 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
399}
400
401/// Return true if the argument is a non-empty sequence of ones starting at the
402/// least significant bit with the remainder zero (32 bit version).
403/// Ex. isMask_32(0x0000FFFFU) == true.
404constexpr inline bool isMask_32(uint32_t Value) {
405 return Value && ((Value + 1) & Value) == 0;
406}
407
408/// Return true if the argument is a non-empty sequence of ones starting at the
409/// least significant bit with the remainder zero (64 bit version).
410constexpr inline bool isMask_64(uint64_t Value) {
411 return Value && ((Value + 1) & Value) == 0;
412}
413
414/// Return true if the argument contains a non-empty sequence of ones with the
415/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
416constexpr inline bool isShiftedMask_32(uint32_t Value) {
417 return Value && isMask_32((Value - 1) | Value);
418}
419
420/// Return true if the argument contains a non-empty sequence of ones with the
421/// remainder zero (64 bit version.)
422constexpr inline bool isShiftedMask_64(uint64_t Value) {
423 return Value && isMask_64((Value - 1) | Value);
424}
425
426/// Return true if the argument is a power of two > 0.
427/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
428constexpr inline bool isPowerOf2_32(uint32_t Value) {
429 return Value && !(Value & (Value - 1));
430}
431
432/// Return true if the argument is a power of two > 0 (64 bit edition.)
433constexpr inline bool isPowerOf2_64(uint64_t Value) {
434 return Value && !(Value & (Value - 1));
435}
436
437/// Return a byte-swapped representation of the 16-bit argument.
438inline uint16_t ByteSwap_16(uint16_t Value) {
439 return sys::SwapByteOrder_16(Value);
440}
441
442/// Return a byte-swapped representation of the 32-bit argument.
443inline uint32_t ByteSwap_32(uint32_t Value) {
444 return sys::SwapByteOrder_32(Value);
445}
446
447/// Return a byte-swapped representation of the 64-bit argument.
448inline uint64_t ByteSwap_64(uint64_t Value) {
449 return sys::SwapByteOrder_64(Value);
450}
451
452/// Count the number of ones from the most significant bit to the first
453/// zero bit.
454///
455/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
456/// Only unsigned integral types are allowed.
457///
458/// \param ZB the behavior on an input of all ones. Only ZB_Width and
459/// ZB_Undefined are valid arguments.
460template <typename T>
461unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
462 static_assert(std::numeric_limits<T>::is_integer &&
463 !std::numeric_limits<T>::is_signed,
464 "Only unsigned integral types are allowed.");
465 return countLeadingZeros<T>(~Value, ZB);
466}
467
468/// Count the number of ones from the least significant bit to the first
469/// zero bit.
470///
471/// Ex. countTrailingOnes(0x00FF00FF) == 8.
472/// Only unsigned integral types are allowed.
473///
474/// \param ZB the behavior on an input of all ones. Only ZB_Width and
475/// ZB_Undefined are valid arguments.
476template <typename T>
477unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
478 static_assert(std::numeric_limits<T>::is_integer &&
479 !std::numeric_limits<T>::is_signed,
480 "Only unsigned integral types are allowed.");
481 return countTrailingZeros<T>(~Value, ZB);
482}
483
484namespace detail {
485template <typename T, std::size_t SizeOfT> struct PopulationCounter {
486 static unsigned count(T Value) {
487 // Generic version, forward to 32 bits.
488 static_assert(SizeOfT <= 4, "Not implemented!");
489#if __GNUC__4 >= 4
490 return __builtin_popcount(Value);
491#else
492 uint32_t v = Value;
493 v = v - ((v >> 1) & 0x55555555);
494 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
495 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
496#endif
497 }
498};
499
500template <typename T> struct PopulationCounter<T, 8> {
501 static unsigned count(T Value) {
502#if __GNUC__4 >= 4
503 return __builtin_popcountll(Value);
504#else
505 uint64_t v = Value;
506 v = v - ((v >> 1) & 0x5555555555555555ULL);
507 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
508 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
509 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
510#endif
511 }
512};
513} // namespace detail
514
515/// Count the number of set bits in a value.
516/// Ex. countPopulation(0xF000F000) = 8
517/// Returns 0 if the word is zero.
518template <typename T>
519inline unsigned countPopulation(T Value) {
520 static_assert(std::numeric_limits<T>::is_integer &&
521 !std::numeric_limits<T>::is_signed,
522 "Only unsigned integral types are allowed.");
523 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
524}
525
526/// Return the log base 2 of the specified value.
527inline double Log2(double Value) {
528#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
529 return __builtin_log(Value) / __builtin_log(2.0);
530#else
531 return log2(Value);
532#endif
533}
534
535/// Return the floor log base 2 of the specified value, -1 if the value is zero.
536/// (32 bit edition.)
537/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
538inline unsigned Log2_32(uint32_t Value) {
539 return 31 - countLeadingZeros(Value);
540}
541
542/// Return the floor log base 2 of the specified value, -1 if the value is zero.
543/// (64 bit edition.)
544inline unsigned Log2_64(uint64_t Value) {
545 return 63 - countLeadingZeros(Value);
546}
547
548/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
549/// (32 bit edition).
550/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
551inline unsigned Log2_32_Ceil(uint32_t Value) {
552 return 32 - countLeadingZeros(Value - 1);
553}
554
555/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
556/// (64 bit edition.)
557inline unsigned Log2_64_Ceil(uint64_t Value) {
558 return 64 - countLeadingZeros(Value - 1);
559}
560
561/// Return the greatest common divisor of the values using Euclid's algorithm.
562inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
563 while (B) {
564 uint64_t T = B;
565 B = A % B;
566 A = T;
567 }
568 return A;
569}
570
571/// This function takes a 64-bit integer and returns the bit equivalent double.
572inline double BitsToDouble(uint64_t Bits) {
573 double D;
574 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
575 memcpy(&D, &Bits, sizeof(Bits));
576 return D;
577}
578
579/// This function takes a 32-bit integer and returns the bit equivalent float.
580inline float BitsToFloat(uint32_t Bits) {
581 float F;
582 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
583 memcpy(&F, &Bits, sizeof(Bits));
584 return F;
585}
586
587/// This function takes a double and returns the bit equivalent 64-bit integer.
588/// Note that copying doubles around changes the bits of NaNs on some hosts,
589/// notably x86, so this routine cannot be used if these bits are needed.
590inline uint64_t DoubleToBits(double Double) {
591 uint64_t Bits;
592 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
593 memcpy(&Bits, &Double, sizeof(Double));
594 return Bits;
595}
596
597/// This function takes a float and returns the bit equivalent 32-bit integer.
598/// Note that copying floats around changes the bits of NaNs on some hosts,
599/// notably x86, so this routine cannot be used if these bits are needed.
600inline uint32_t FloatToBits(float Float) {
601 uint32_t Bits;
602 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
603 memcpy(&Bits, &Float, sizeof(Float));
604 return Bits;
605}
606
607/// A and B are either alignments or offsets. Return the minimum alignment that
608/// may be assumed after adding the two together.
609constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
610 // The largest power of 2 that divides both A and B.
611 //
612 // Replace "-Value" by "1+~Value" in the following commented code to avoid
613 // MSVC warning C4146
614 // return (A | B) & -(A | B);
615 return (A | B) & (1 + ~(A | B));
616}
617
618/// Aligns \c Addr to \c Alignment bytes, rounding up.
619///
620/// Alignment should be a power of two. This method rounds up, so
621/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
622inline uintptr_t alignAddr(const void *Addr, size_t Alignment) {
623 assert(Alignment && isPowerOf2_64((uint64_t)Alignment) &&((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 624, __PRETTY_FUNCTION__))
624 "Alignment is not a power of two!")((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 624, __PRETTY_FUNCTION__))
;
625
626 assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr)(((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr) ? static_cast
<void> (0) : __assert_fail ("(uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr"
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 626, __PRETTY_FUNCTION__))
;
627
628 return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1));
629}
630
631/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment
632/// bytes, rounding up.
633inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) {
634 return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
635}
636
637/// Returns the next power of two (in 64-bits) that is strictly greater than A.
638/// Returns zero on overflow.
639inline uint64_t NextPowerOf2(uint64_t A) {
640 A |= (A >> 1);
641 A |= (A >> 2);
642 A |= (A >> 4);
643 A |= (A >> 8);
644 A |= (A >> 16);
645 A |= (A >> 32);
646 return A + 1;
647}
648
649/// Returns the power of two which is less than or equal to the given value.
650/// Essentially, it is a floor operation across the domain of powers of two.
651inline uint64_t PowerOf2Floor(uint64_t A) {
652 if (!A) return 0;
653 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
654}
655
656/// Returns the power of two which is greater than or equal to the given value.
657/// Essentially, it is a ceil operation across the domain of powers of two.
658inline uint64_t PowerOf2Ceil(uint64_t A) {
659 if (!A)
660 return 0;
661 return NextPowerOf2(A - 1);
662}
663
664/// Returns the next integer (mod 2**64) that is greater than or equal to
665/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
666///
667/// If non-zero \p Skew is specified, the return value will be a minimal
668/// integer that is greater than or equal to \p Value and equal to
669/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
670/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
671///
672/// Examples:
673/// \code
674/// alignTo(5, 8) = 8
675/// alignTo(17, 8) = 24
676/// alignTo(~0LL, 8) = 0
677/// alignTo(321, 255) = 510
678///
679/// alignTo(5, 8, 7) = 7
680/// alignTo(17, 8, 1) = 17
681/// alignTo(~0LL, 8, 3) = 3
682/// alignTo(321, 255, 42) = 552
683/// \endcode
684inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
685 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 685, __PRETTY_FUNCTION__))
;
686 Skew %= Align;
687 return (Value + Align - 1 - Skew) / Align * Align + Skew;
688}
689
690/// Returns the next integer (mod 2**64) that is greater than or equal to
691/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
692template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
693 static_assert(Align != 0u, "Align must be non-zero");
694 return (Value + Align - 1) / Align * Align;
695}
696
697/// Returns the integer ceil(Numerator / Denominator).
698inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
699 return alignTo(Numerator, Denominator) / Denominator;
700}
701
702/// \c alignTo for contexts where a constant expression is required.
703/// \sa alignTo
704///
705/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
706template <uint64_t Align>
707struct AlignTo {
708 static_assert(Align != 0u, "Align must be non-zero");
709 template <uint64_t Value>
710 struct from_value {
711 static const uint64_t value = (Value + Align - 1) / Align * Align;
712 };
713};
714
715/// Returns the largest uint64_t less than or equal to \p Value and is
716/// \p Skew mod \p Align. \p Align must be non-zero
717inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
718 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 718, __PRETTY_FUNCTION__))
;
719 Skew %= Align;
720 return (Value - Skew) / Align * Align + Skew;
721}
722
723/// Returns the offset to the next integer (mod 2**64) that is greater than
724/// or equal to \p Value and is a multiple of \p Align. \p Align must be
725/// non-zero.
726inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
727 return alignTo(Value, Align) - Value;
728}
729
730/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
731/// Requires 0 < B <= 32.
732template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
733 static_assert(B > 0, "Bit width can't be 0.");
734 static_assert(B <= 32, "Bit width out of range.");
735 return int32_t(X << (32 - B)) >> (32 - B);
736}
737
738/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
739/// Requires 0 < B < 32.
740inline int32_t SignExtend32(uint32_t X, unsigned B) {
741 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 741, __PRETTY_FUNCTION__))
;
742 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 742, __PRETTY_FUNCTION__))
;
743 return int32_t(X << (32 - B)) >> (32 - B);
744}
745
746/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
747/// Requires 0 < B < 64.
748template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
749 static_assert(B > 0, "Bit width can't be 0.");
750 static_assert(B <= 64, "Bit width out of range.");
751 return int64_t(x << (64 - B)) >> (64 - B);
752}
753
754/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
755/// Requires 0 < B < 64.
756inline int64_t SignExtend64(uint64_t X, unsigned B) {
757 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 757, __PRETTY_FUNCTION__))
;
758 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Support/MathExtras.h"
, 758, __PRETTY_FUNCTION__))
;
759 return int64_t(X << (64 - B)) >> (64 - B);
760}
761
762/// Subtract two unsigned integers, X and Y, of type T and return the absolute
763/// value of the result.
764template <typename T>
765typename std::enable_if<std::is_unsigned<T>::value, T>::type
766AbsoluteDifference(T X, T Y) {
767 return std::max(X, Y) - std::min(X, Y);
768}
769
770/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
771/// maximum representable value of T on overflow. ResultOverflowed indicates if
772/// the result is larger than the maximum representable value of type T.
773template <typename T>
774typename std::enable_if<std::is_unsigned<T>::value, T>::type
775SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
776 bool Dummy;
777 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
778 // Hacker's Delight, p. 29
779 T Z = X + Y;
780 Overflowed = (Z < X || Z < Y);
781 if (Overflowed)
782 return std::numeric_limits<T>::max();
783 else
784 return Z;
785}
786
787/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
788/// maximum representable value of T on overflow. ResultOverflowed indicates if
789/// the result is larger than the maximum representable value of type T.
790template <typename T>
791typename std::enable_if<std::is_unsigned<T>::value, T>::type
792SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
793 bool Dummy;
794 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
795
796 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
797 // because it fails for uint16_t (where multiplication can have undefined
798 // behavior due to promotion to int), and requires a division in addition
799 // to the multiplication.
800
801 Overflowed = false;
802
803 // Log2(Z) would be either Log2Z or Log2Z + 1.
804 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
805 // will necessarily be less than Log2Max as desired.
806 int Log2Z = Log2_64(X) + Log2_64(Y);
807 const T Max = std::numeric_limits<T>::max();
808 int Log2Max = Log2_64(Max);
809 if (Log2Z < Log2Max) {
810 return X * Y;
811 }
812 if (Log2Z > Log2Max) {
813 Overflowed = true;
814 return Max;
815 }
816
817 // We're going to use the top bit, and maybe overflow one
818 // bit past it. Multiply all but the bottom bit then add
819 // that on at the end.
820 T Z = (X >> 1) * Y;
821 if (Z & ~(Max >> 1)) {
822 Overflowed = true;
823 return Max;
824 }
825 Z <<= 1;
826 if (X & 1)
827 return SaturatingAdd(Z, Y, ResultOverflowed);
828
829 return Z;
830}
831
832/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
833/// the product. Clamp the result to the maximum representable value of T on
834/// overflow. ResultOverflowed indicates if the result is larger than the
835/// maximum representable value of type T.
836template <typename T>
837typename std::enable_if<std::is_unsigned<T>::value, T>::type
838SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
839 bool Dummy;
840 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
841
842 T Product = SaturatingMultiply(X, Y, &Overflowed);
843 if (Overflowed)
844 return Product;
845
846 return SaturatingAdd(A, Product, &Overflowed);
847}
848
849/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
850extern const float huge_valf;
851} // End llvm namespace
852
853#endif