Bug Summary

File:lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
Warning:line 222, column 16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/ARM -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/ARMTargetTransformInfo.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/ARMTargetTransformInfo.cpp

1//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARMTargetTransformInfo.h"
11#include "ARMSubtarget.h"
12#include "MCTargetDesc/ARMAddressingModes.h"
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/Analysis/LoopInfo.h"
16#include "llvm/CodeGen/CostTable.h"
17#include "llvm/CodeGen/ISDOpcodes.h"
18#include "llvm/CodeGen/ValueTypes.h"
19#include "llvm/IR/BasicBlock.h"
20#include "llvm/IR/CallSite.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DerivedTypes.h"
23#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Instructions.h"
25#include "llvm/IR/Type.h"
26#include "llvm/MC/SubtargetFeature.h"
27#include "llvm/Support/Casting.h"
28#include "llvm/Support/MachineValueType.h"
29#include "llvm/Target/TargetMachine.h"
30#include <algorithm>
31#include <cassert>
32#include <cstdint>
33#include <utility>
34
35using namespace llvm;
36
37#define DEBUG_TYPE"armtti" "armtti"
38
39bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
40 const Function *Callee) const {
41 const TargetMachine &TM = getTLI()->getTargetMachine();
42 const FeatureBitset &CallerBits =
43 TM.getSubtargetImpl(*Caller)->getFeatureBits();
44 const FeatureBitset &CalleeBits =
45 TM.getSubtargetImpl(*Callee)->getFeatureBits();
46
47 // To inline a callee, all features not in the whitelist must match exactly.
48 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
49 (CalleeBits & ~InlineFeatureWhitelist);
50 // For features in the whitelist, the callee's features must be a subset of
51 // the callers'.
52 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
53 (CalleeBits & InlineFeatureWhitelist);
54 return MatchExact && MatchSubset;
55}
56
57int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
58 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 58, __PRETTY_FUNCTION__))
;
59
60 unsigned Bits = Ty->getPrimitiveSizeInBits();
61 if (Bits == 0 || Imm.getActiveBits() >= 64)
8
Assuming 'Bits' is not equal to 0
9
Taking false branch
62 return 4;
63
64 int64_t SImmVal = Imm.getSExtValue();
65 uint64_t ZImmVal = Imm.getZExtValue();
66 if (!ST->isThumb()) {
10
Taking false branch
67 if ((SImmVal >= 0 && SImmVal < 65536) ||
68 (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
69 (ARM_AM::getSOImmVal(~ZImmVal) != -1))
70 return 1;
71 return ST->hasV6T2Ops() ? 2 : 3;
72 }
73 if (ST->isThumb2()) {
11
Taking false branch
74 if ((SImmVal >= 0 && SImmVal < 65536) ||
75 (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
76 (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
77 return 1;
78 return ST->hasV6T2Ops() ? 2 : 3;
79 }
80 // Thumb1, any i8 imm cost 1.
81 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
12
Assuming 'Bits' is not equal to 8
13
Assuming 'SImmVal' is < 0
82 return 1;
83 if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
14
Calling 'isThumbImmShiftedVal'
84 return 2;
85 // Load from constantpool.
86 return 3;
87}
88
89// Constants smaller than 256 fit in the immediate field of
90// Thumb1 instructions so we return a zero cost and 1 otherwise.
91int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
92 const APInt &Imm, Type *Ty) {
93 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
94 return 0;
95
96 return 1;
97}
98
99int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
100 Type *Ty) {
101 // Division by a constant can be turned into multiplication, but only if we
102 // know it's constant. So it's not so much that the immediate is cheap (it's
103 // not), but that the alternative is worse.
104 // FIXME: this is probably unneeded with GlobalISel.
105 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1
Assuming 'Opcode' is not equal to SDiv
2
Assuming 'Opcode' is not equal to UDiv
106 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
3
Assuming 'Opcode' is not equal to SRem
4
Assuming 'Opcode' is not equal to URem
107 Idx == 1)
108 return 0;
109
110 if (Opcode == Instruction::And)
5
Assuming 'Opcode' is equal to And
6
Taking true branch
111 // Conversion to BIC is free, and means we can use ~Imm instead.
112 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
7
Calling 'ARMTTIImpl::getIntImmCost'
113
114 if (Opcode == Instruction::Add)
115 // Conversion to SUB is free, and means we can use -Imm instead.
116 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
117
118 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
119 Ty->getIntegerBitWidth() == 32) {
120 int64_t NegImm = -Imm.getSExtValue();
121 if (ST->isThumb2() && NegImm < 1<<12)
122 // icmp X, #-C -> cmn X, #C
123 return 0;
124 if (ST->isThumb() && NegImm < 1<<8)
125 // icmp X, #-C -> adds X, #C
126 return 0;
127 }
128
129 // xor a, -1 can always be folded to MVN
130 if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
131 return 0;
132
133 return getIntImmCost(Imm, Ty);
134}
135
136int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
137 const Instruction *I) {
138 int ISD = TLI->InstructionOpcodeToISD(Opcode);
139 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 139, __PRETTY_FUNCTION__))
;
140
141 // Single to/from double precision conversions.
142 static const CostTblEntry NEONFltDblTbl[] = {
143 // Vector fptrunc/fpext conversions.
144 { ISD::FP_ROUND, MVT::v2f64, 2 },
145 { ISD::FP_EXTEND, MVT::v2f32, 2 },
146 { ISD::FP_EXTEND, MVT::v4f32, 4 }
147 };
148
149 if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
150 ISD == ISD::FP_EXTEND)) {
151 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
152 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
153 return LT.first * Entry->Cost;
154 }
155
156 EVT SrcTy = TLI->getValueType(DL, Src);
157 EVT DstTy = TLI->getValueType(DL, Dst);
158
159 if (!SrcTy.isSimple() || !DstTy.isSimple())
160 return BaseT::getCastInstrCost(Opcode, Dst, Src);
161
162 // Some arithmetic, load and store operations have specific instructions
163 // to cast up/down their types automatically at no extra cost.
164 // TODO: Get these tables to know at least what the related operations are.
165 static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
166 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
167 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
168 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
169 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
170 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
171 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
172
173 // The number of vmovl instructions for the extension.
174 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
175 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
176 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
177 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
178 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
179 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
180 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
181 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
182 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
183 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
184
185 // Operations that we legalize using splitting.
186 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
187 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
188
189 // Vector float <-> i32 conversions.
190 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
191 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
192
193 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
194 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
195 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
196 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
197 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
198 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
199 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
200 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
201 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
202 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
203 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
204 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
205 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
206 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
207 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
208 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
209 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
210 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
211 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
212 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
213
214 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
215 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
216 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
217 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
218 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
219 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
220
221 // Vector double <-> i32 conversions.
222 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
223 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
224
225 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
226 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
227 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
228 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
229 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
230 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
231
232 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
233 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
234 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
235 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
236 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
237 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
238 };
239
240 if (SrcTy.isVector() && ST->hasNEON()) {
241 if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
242 DstTy.getSimpleVT(),
243 SrcTy.getSimpleVT()))
244 return Entry->Cost;
245 }
246
247 // Scalar float to integer conversions.
248 static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
249 { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
250 { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
251 { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
252 { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
253 { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
254 { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
255 { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
256 { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
257 { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
258 { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
259 { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
260 { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
261 { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
262 { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
263 { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
264 { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
265 { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
266 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
267 { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
268 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
269 };
270 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
271 if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
272 DstTy.getSimpleVT(),
273 SrcTy.getSimpleVT()))
274 return Entry->Cost;
275 }
276
277 // Scalar integer to float conversions.
278 static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
279 { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
280 { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
281 { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
282 { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
283 { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
284 { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
285 { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
286 { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
287 { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
288 { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
289 { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
290 { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
291 { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
292 { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
293 { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
294 { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
295 { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
296 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
297 { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
298 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
299 };
300
301 if (SrcTy.isInteger() && ST->hasNEON()) {
302 if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
303 ISD, DstTy.getSimpleVT(),
304 SrcTy.getSimpleVT()))
305 return Entry->Cost;
306 }
307
308 // Scalar integer conversion costs.
309 static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
310 // i16 -> i64 requires two dependent operations.
311 { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
312
313 // Truncates on i64 are assumed to be free.
314 { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
315 { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
316 { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
317 { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
318 };
319
320 if (SrcTy.isInteger()) {
321 if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
322 DstTy.getSimpleVT(),
323 SrcTy.getSimpleVT()))
324 return Entry->Cost;
325 }
326
327 return BaseT::getCastInstrCost(Opcode, Dst, Src);
328}
329
330int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
331 unsigned Index) {
332 // Penalize inserting into an D-subregister. We end up with a three times
333 // lower estimated throughput on swift.
334 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
335 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
336 return 3;
337
338 if ((Opcode == Instruction::InsertElement ||
339 Opcode == Instruction::ExtractElement)) {
340 // Cross-class copies are expensive on many microarchitectures,
341 // so assume they are expensive by default.
342 if (ValTy->getVectorElementType()->isIntegerTy())
343 return 3;
344
345 // Even if it's not a cross class copy, this likely leads to mixing
346 // of NEON and VFP code and should be therefore penalized.
347 if (ValTy->isVectorTy() &&
348 ValTy->getScalarSizeInBits() <= 32)
349 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
350 }
351
352 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
353}
354
355int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
356 const Instruction *I) {
357 int ISD = TLI->InstructionOpcodeToISD(Opcode);
358 // On NEON a vector select gets lowered to vbsl.
359 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
360 // Lowering of some vector selects is currently far from perfect.
361 static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
362 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
363 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
364 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
365 };
366
367 EVT SelCondTy = TLI->getValueType(DL, CondTy);
368 EVT SelValTy = TLI->getValueType(DL, ValTy);
369 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
370 if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
371 SelCondTy.getSimpleVT(),
372 SelValTy.getSimpleVT()))
373 return Entry->Cost;
374 }
375
376 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
377 return LT.first;
378 }
379
380 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
381}
382
383int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
384 const SCEV *Ptr) {
385 // Address computations in vectorized code with non-consecutive addresses will
386 // likely result in more instructions compared to scalar code where the
387 // computation can more often be merged into the index mode. The resulting
388 // extra micro-ops can significantly decrease throughput.
389 unsigned NumVectorInstToHideOverhead = 10;
390 int MaxMergeDistance = 64;
391
392 if (Ty->isVectorTy() && SE &&
393 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
394 return NumVectorInstToHideOverhead;
395
396 // In many cases the address computation is not merged into the instruction
397 // addressing mode.
398 return 1;
399}
400
401int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
402 Type *SubTp) {
403 if (Kind == TTI::SK_Broadcast) {
404 static const CostTblEntry NEONDupTbl[] = {
405 // VDUP handles these cases.
406 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
407 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
408 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
409 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
410 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
411 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
412
413 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
414 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
415 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
416 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
417
418 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
419
420 if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
421 LT.second))
422 return LT.first * Entry->Cost;
423
424 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
425 }
426 if (Kind == TTI::SK_Reverse) {
427 static const CostTblEntry NEONShuffleTbl[] = {
428 // Reverse shuffle cost one instruction if we are shuffling within a
429 // double word (vrev) or two if we shuffle a quad word (vrev, vext).
430 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
431 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
432 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
433 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
434 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
435 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
436
437 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
438 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
439 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
440 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
441
442 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
443
444 if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
445 LT.second))
446 return LT.first * Entry->Cost;
447
448 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
449 }
450 if (Kind == TTI::SK_Select) {
451 static const CostTblEntry NEONSelShuffleTbl[] = {
452 // Select shuffle cost table for ARM. Cost is the number of instructions
453 // required to create the shuffled vector.
454
455 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
456 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
457 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
458 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
459
460 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
461 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
462 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
463
464 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
465
466 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
467
468 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
469 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
470 ISD::VECTOR_SHUFFLE, LT.second))
471 return LT.first * Entry->Cost;
472 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
473 }
474 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
475}
476
477int ARMTTIImpl::getArithmeticInstrCost(
478 unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
479 TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
480 TTI::OperandValueProperties Opd2PropInfo,
481 ArrayRef<const Value *> Args) {
482 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
483 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
484
485 const unsigned FunctionCallDivCost = 20;
486 const unsigned ReciprocalDivCost = 10;
487 static const CostTblEntry CostTbl[] = {
488 // Division.
489 // These costs are somewhat random. Choose a cost of 20 to indicate that
490 // vectorizing devision (added function call) is going to be very expensive.
491 // Double registers types.
492 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
493 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
494 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
495 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
496 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
497 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
498 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
499 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
500 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
501 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
502 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
503 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
504 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
505 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
506 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
507 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
508 // Quad register types.
509 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
510 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
511 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
512 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
513 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
514 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
515 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
516 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
517 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
518 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
519 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
520 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
521 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
522 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
523 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
524 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
525 // Multiplication.
526 };
527
528 if (ST->hasNEON())
529 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
530 return LT.first * Entry->Cost;
531
532 int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
533 Opd1PropInfo, Opd2PropInfo);
534
535 // This is somewhat of a hack. The problem that we are facing is that SROA
536 // creates a sequence of shift, and, or instructions to construct values.
537 // These sequences are recognized by the ISel and have zero-cost. Not so for
538 // the vectorized code. Because we have support for v2i64 but not i64 those
539 // sequences look particularly beneficial to vectorize.
540 // To work around this we increase the cost of v2i64 operations to make them
541 // seem less beneficial.
542 if (LT.second == MVT::v2i64 &&
543 Op2Info == TargetTransformInfo::OK_UniformConstantValue)
544 Cost += 4;
545
546 return Cost;
547}
548
549int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
550 unsigned AddressSpace, const Instruction *I) {
551 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
552
553 if (Src->isVectorTy() && Alignment != 16 &&
554 Src->getVectorElementType()->isDoubleTy()) {
555 // Unaligned loads/stores are extremely inefficient.
556 // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
557 return LT.first * 4;
558 }
559 return LT.first;
560}
561
562int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
563 unsigned Factor,
564 ArrayRef<unsigned> Indices,
565 unsigned Alignment,
566 unsigned AddressSpace,
567 bool IsMasked) {
568 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 568, __PRETTY_FUNCTION__))
;
569 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 569, __PRETTY_FUNCTION__))
;
570
571 // vldN/vstN doesn't support vector types of i64/f64 element.
572 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
573
574 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
575 !IsMasked) {
576 unsigned NumElts = VecTy->getVectorNumElements();
577 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
578
579 // vldN/vstN only support legal vector types of size 64 or 128 in bits.
580 // Accesses having vector types that are a multiple of 128 bits can be
581 // matched to more than one vldN/vstN instruction.
582 if (NumElts % Factor == 0 &&
583 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
584 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
585 }
586
587 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
588 Alignment, AddressSpace, IsMasked);
589}
590
591void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
592 TTI::UnrollingPreferences &UP) {
593 // Only currently enable these preferences for M-Class cores.
594 if (!ST->isMClass())
595 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
596
597 // Disable loop unrolling for Oz and Os.
598 UP.OptSizeThreshold = 0;
599 UP.PartialOptSizeThreshold = 0;
600 if (L->getHeader()->getParent()->optForSize())
601 return;
602
603 // Only enable on Thumb-2 targets.
604 if (!ST->isThumb2())
605 return;
606
607 SmallVector<BasicBlock*, 4> ExitingBlocks;
608 L->getExitingBlocks(ExitingBlocks);
609 LLVM_DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
610 << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
611 << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
;
612
613 // Only allow another exit other than the latch. This acts as an early exit
614 // as it mirrors the profitability calculation of the runtime unroller.
615 if (ExitingBlocks.size() > 2)
616 return;
617
618 // Limit the CFG of the loop body for targets with a branch predictor.
619 // Allowing 4 blocks permits if-then-else diamonds in the body.
620 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
621 return;
622
623 // Scan the loop: don't unroll loops with calls as this could prevent
624 // inlining.
625 unsigned Cost = 0;
626 for (auto *BB : L->getBlocks()) {
627 for (auto &I : *BB) {
628 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
629 ImmutableCallSite CS(&I);
630 if (const Function *F = CS.getCalledFunction()) {
631 if (!isLoweredToCall(F))
632 continue;
633 }
634 return;
635 }
636 SmallVector<const Value*, 4> Operands(I.value_op_begin(),
637 I.value_op_end());
638 Cost += getUserCost(&I, Operands);
639 }
640 }
641
642 LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Cost of loop: " << Cost <<
"\n"; } } while (false)
;
643
644 UP.Partial = true;
645 UP.Runtime = true;
646 UP.UnrollRemainder = true;
647 UP.DefaultUnrollRuntimeCount = 4;
648 UP.UnrollAndJam = true;
649 UP.UnrollAndJamInnerLoopThreshold = 60;
650
651 // Force unrolling small loops can be very useful because of the branch
652 // taken cost of the backedge.
653 if (Cost < 12)
654 UP.Force = true;
655}

/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

1//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the ARM addressing mode implementation stuff.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
15#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/APInt.h"
19#include "llvm/ADT/bit.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/MathExtras.h"
22#include <cassert>
23
24namespace llvm {
25
26/// ARM_AM - ARM Addressing Mode Stuff
27namespace ARM_AM {
28 enum ShiftOpc {
29 no_shift = 0,
30 asr,
31 lsl,
32 lsr,
33 ror,
34 rrx
35 };
36
37 enum AddrOpc {
38 sub = 0,
39 add
40 };
41
42 inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
43
44 inline const char *getShiftOpcStr(ShiftOpc Op) {
45 switch (Op) {
46 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 46)
;
47 case ARM_AM::asr: return "asr";
48 case ARM_AM::lsl: return "lsl";
49 case ARM_AM::lsr: return "lsr";
50 case ARM_AM::ror: return "ror";
51 case ARM_AM::rrx: return "rrx";
52 }
53 }
54
55 inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
56 switch (Op) {
57 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 57)
;
58 case ARM_AM::asr: return 2;
59 case ARM_AM::lsl: return 0;
60 case ARM_AM::lsr: return 1;
61 case ARM_AM::ror: return 3;
62 }
63 }
64
65 enum AMSubMode {
66 bad_am_submode = 0,
67 ia,
68 ib,
69 da,
70 db
71 };
72
73 inline const char *getAMSubModeStr(AMSubMode Mode) {
74 switch (Mode) {
75 default: llvm_unreachable("Unknown addressing sub-mode!")::llvm::llvm_unreachable_internal("Unknown addressing sub-mode!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 75)
;
76 case ARM_AM::ia: return "ia";
77 case ARM_AM::ib: return "ib";
78 case ARM_AM::da: return "da";
79 case ARM_AM::db: return "db";
80 }
81 }
82
83 /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
84 ///
85 inline unsigned rotr32(unsigned Val, unsigned Amt) {
86 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 86, __PRETTY_FUNCTION__))
;
87 return (Val >> Amt) | (Val << ((32-Amt)&31));
88 }
89
90 /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
91 ///
92 inline unsigned rotl32(unsigned Val, unsigned Amt) {
93 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 93, __PRETTY_FUNCTION__))
;
94 return (Val << Amt) | (Val >> ((32-Amt)&31));
95 }
96
97 //===--------------------------------------------------------------------===//
98 // Addressing Mode #1: shift_operand with registers
99 //===--------------------------------------------------------------------===//
100 //
101 // This 'addressing mode' is used for arithmetic instructions. It can
102 // represent things like:
103 // reg
104 // reg [asr|lsl|lsr|ror|rrx] reg
105 // reg [asr|lsl|lsr|ror|rrx] imm
106 //
107 // This is stored three operands [rega, regb, opc]. The first is the base
108 // reg, the second is the shift amount (or reg0 if not present or imm). The
109 // third operand encodes the shift opcode and the imm if a reg isn't present.
110 //
111 inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
112 return ShOp | (Imm << 3);
113 }
114 inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; }
115 inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); }
116
117 /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
118 /// the 8-bit imm value.
119 inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; }
120 /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
121 /// the rotate amount.
122 inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; }
123
124 /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
125 /// computing the rotate amount to use. If this immediate value cannot be
126 /// handled with a single shifter-op, determine a good rotate amount that will
127 /// take a maximal chunk of bits out of the immediate.
128 inline unsigned getSOImmValRotate(unsigned Imm) {
129 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
130 // of zero.
131 if ((Imm & ~255U) == 0) return 0;
132
133 // Use CTZ to compute the rotate amount.
134 unsigned TZ = countTrailingZeros(Imm);
135
136 // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
137 // not 9.
138 unsigned RotAmt = TZ & ~1;
139
140 // If we can handle this spread, return it.
141 if ((rotr32(Imm, RotAmt) & ~255U) == 0)
142 return (32-RotAmt)&31; // HW rotates right, not left.
143
144 // For values like 0xF000000F, we should ignore the low 6 bits, then
145 // retry the hunt.
146 if (Imm & 63U) {
147 unsigned TZ2 = countTrailingZeros(Imm & ~63U);
148 unsigned RotAmt2 = TZ2 & ~1;
149 if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
150 return (32-RotAmt2)&31; // HW rotates right, not left.
151 }
152
153 // Otherwise, we have no way to cover this span of bits with a single
154 // shifter_op immediate. Return a chunk of bits that will be useful to
155 // handle.
156 return (32-RotAmt)&31; // HW rotates right, not left.
157 }
158
159 /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
160 /// into an shifter_operand immediate operand, return the 12-bit encoding for
161 /// it. If not, return -1.
162 inline int getSOImmVal(unsigned Arg) {
163 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
164 // of zero.
165 if ((Arg & ~255U) == 0) return Arg;
166
167 unsigned RotAmt = getSOImmValRotate(Arg);
168
169 // If this cannot be handled with a single shifter_op, bail out.
170 if (rotr32(~255U, RotAmt) & Arg)
171 return -1;
172
173 // Encode this correctly.
174 return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
175 }
176
177 /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
178 /// or'ing together two SOImmVal's.
179 inline bool isSOImmTwoPartVal(unsigned V) {
180 // If this can be handled with a single shifter_op, bail out.
181 V = rotr32(~255U, getSOImmValRotate(V)) & V;
182 if (V == 0)
183 return false;
184
185 // If this can be handled with two shifter_op's, accept.
186 V = rotr32(~255U, getSOImmValRotate(V)) & V;
187 return V == 0;
188 }
189
190 /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
191 /// return the first chunk of it.
192 inline unsigned getSOImmTwoPartFirst(unsigned V) {
193 return rotr32(255U, getSOImmValRotate(V)) & V;
194 }
195
196 /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
197 /// return the second chunk of it.
198 inline unsigned getSOImmTwoPartSecond(unsigned V) {
199 // Mask out the first hunk.
200 V = rotr32(~255U, getSOImmValRotate(V)) & V;
201
202 // Take what's left.
203 assert(V == (rotr32(255U, getSOImmValRotate(V)) & V))((V == (rotr32(255U, getSOImmValRotate(V)) & V)) ? static_cast
<void> (0) : __assert_fail ("V == (rotr32(255U, getSOImmValRotate(V)) & V)"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 203, __PRETTY_FUNCTION__))
;
204 return V;
205 }
206
207 /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
208 /// by a left shift. Returns the shift amount to use.
209 inline unsigned getThumbImmValShift(unsigned Imm) {
210 // 8-bit (or less) immediates are trivially immediate operand with a shift
211 // of zero.
212 if ((Imm & ~255U) == 0) return 0;
16
Assuming the condition is false
17
Taking false branch
213
214 // Use CTZ to compute the shift amount.
215 return countTrailingZeros(Imm);
18
Calling 'countTrailingZeros<unsigned int>'
25
Returning from 'countTrailingZeros<unsigned int>'
26
Returning the value 32
216 }
217
218 /// isThumbImmShiftedVal - Return true if the specified value can be obtained
219 /// by left shifting a 8-bit immediate.
220 inline bool isThumbImmShiftedVal(unsigned V) {
221 // If this can be handled with
222 V = (~255U << getThumbImmValShift(V)) & V;
15
Calling 'getThumbImmValShift'
27
Returning from 'getThumbImmValShift'
28
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
223 return V == 0;
224 }
225
226 /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
227 /// by a left shift. Returns the shift amount to use.
228 inline unsigned getThumbImm16ValShift(unsigned Imm) {
229 // 16-bit (or less) immediates are trivially immediate operand with a shift
230 // of zero.
231 if ((Imm & ~65535U) == 0) return 0;
232
233 // Use CTZ to compute the shift amount.
234 return countTrailingZeros(Imm);
235 }
236
237 /// isThumbImm16ShiftedVal - Return true if the specified value can be
238 /// obtained by left shifting a 16-bit immediate.
239 inline bool isThumbImm16ShiftedVal(unsigned V) {
240 // If this can be handled with
241 V = (~65535U << getThumbImm16ValShift(V)) & V;
242 return V == 0;
243 }
244
245 /// getThumbImmNonShiftedVal - If V is a value that satisfies
246 /// isThumbImmShiftedVal, return the non-shiftd value.
247 inline unsigned getThumbImmNonShiftedVal(unsigned V) {
248 return V >> getThumbImmValShift(V);
249 }
250
251
252 /// getT2SOImmValSplat - Return the 12-bit encoded representation
253 /// if the specified value can be obtained by splatting the low 8 bits
254 /// into every other byte or every byte of a 32-bit value. i.e.,
255 /// 00000000 00000000 00000000 abcdefgh control = 0
256 /// 00000000 abcdefgh 00000000 abcdefgh control = 1
257 /// abcdefgh 00000000 abcdefgh 00000000 control = 2
258 /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
259 /// Return -1 if none of the above apply.
260 /// See ARM Reference Manual A6.3.2.
261 inline int getT2SOImmValSplatVal(unsigned V) {
262 unsigned u, Vs, Imm;
263 // control = 0
264 if ((V & 0xffffff00) == 0)
265 return V;
266
267 // If the value is zeroes in the first byte, just shift those off
268 Vs = ((V & 0xff) == 0) ? V >> 8 : V;
269 // Any passing value only has 8 bits of payload, splatted across the word
270 Imm = Vs & 0xff;
271 // Likewise, any passing values have the payload splatted into the 3rd byte
272 u = Imm | (Imm << 16);
273
274 // control = 1 or 2
275 if (Vs == u)
276 return (((Vs == V) ? 1 : 2) << 8) | Imm;
277
278 // control = 3
279 if (Vs == (u | (u << 8)))
280 return (3 << 8) | Imm;
281
282 return -1;
283 }
284
285 /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
286 /// specified value is a rotated 8-bit value. Return -1 if no rotation
287 /// encoding is possible.
288 /// See ARM Reference Manual A6.3.2.
289 inline int getT2SOImmValRotateVal(unsigned V) {
290 unsigned RotAmt = countLeadingZeros(V);
291 if (RotAmt >= 24)
292 return -1;
293
294 // If 'Arg' can be handled with a single shifter_op return the value.
295 if ((rotr32(0xff000000U, RotAmt) & V) == V)
296 return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
297
298 return -1;
299 }
300
301 /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
302 /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
303 /// encoding for it. If not, return -1.
304 /// See ARM Reference Manual A6.3.2.
305 inline int getT2SOImmVal(unsigned Arg) {
306 // If 'Arg' is an 8-bit splat, then get the encoded value.
307 int Splat = getT2SOImmValSplatVal(Arg);
308 if (Splat != -1)
309 return Splat;
310
311 // If 'Arg' can be handled with a single shifter_op return the value.
312 int Rot = getT2SOImmValRotateVal(Arg);
313 if (Rot != -1)
314 return Rot;
315
316 return -1;
317 }
318
319 inline unsigned getT2SOImmValRotate(unsigned V) {
320 if ((V & ~255U) == 0) return 0;
321 // Use CTZ to compute the rotate amount.
322 unsigned RotAmt = countTrailingZeros(V);
323 return (32 - RotAmt) & 31;
324 }
325
326 inline bool isT2SOImmTwoPartVal(unsigned Imm) {
327 unsigned V = Imm;
328 // Passing values can be any combination of splat values and shifter
329 // values. If this can be handled with a single shifter or splat, bail
330 // out. Those should be handled directly, not with a two-part val.
331 if (getT2SOImmValSplatVal(V) != -1)
332 return false;
333 V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
334 if (V == 0)
335 return false;
336
337 // If this can be handled as an immediate, accept.
338 if (getT2SOImmVal(V) != -1) return true;
339
340 // Likewise, try masking out a splat value first.
341 V = Imm;
342 if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
343 V &= ~0xff00ff00U;
344 else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
345 V &= ~0x00ff00ffU;
346 // If what's left can be handled as an immediate, accept.
347 if (getT2SOImmVal(V) != -1) return true;
348
349 // Otherwise, do not accept.
350 return false;
351 }
352
353 inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
354 assert (isT2SOImmTwoPartVal(Imm) &&((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 355, __PRETTY_FUNCTION__))
355 "Immedate cannot be encoded as two part immediate!")((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 355, __PRETTY_FUNCTION__))
;
356 // Try a shifter operand as one part
357 unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
358 // If the rest is encodable as an immediate, then return it.
359 if (getT2SOImmVal(V) != -1) return V;
360
361 // Try masking out a splat value first.
362 if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
363 return Imm & 0xff00ff00U;
364
365 // The other splat is all that's left as an option.
366 assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1)((getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1) ? static_cast
<void> (0) : __assert_fail ("getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 366, __PRETTY_FUNCTION__))
;
367 return Imm & 0x00ff00ffU;
368 }
369
370 inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
371 // Mask out the first hunk
372 Imm ^= getT2SOImmTwoPartFirst(Imm);
373 // Return what's left
374 assert (getT2SOImmVal(Imm) != -1 &&((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 375, __PRETTY_FUNCTION__))
375 "Unable to encode second part of T2 two part SO immediate")((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 375, __PRETTY_FUNCTION__))
;
376 return Imm;
377 }
378
379
380 //===--------------------------------------------------------------------===//
381 // Addressing Mode #2
382 //===--------------------------------------------------------------------===//
383 //
384 // This is used for most simple load/store instructions.
385 //
386 // addrmode2 := reg +/- reg shop imm
387 // addrmode2 := reg +/- imm12
388 //
389 // The first operand is always a Reg. The second operand is a reg if in
390 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
391 // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
392 // fourth operand 16-17 encodes the index mode.
393 //
394 // If this addressing mode is a frame index (before prolog/epilog insertion
395 // and code rewriting), this operand will have the form: FI#, reg0, <offs>
396 // with no shift amount for the frame offset.
397 //
398 inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
399 unsigned IdxMode = 0) {
400 assert(Imm12 < (1 << 12) && "Imm too large!")((Imm12 < (1 << 12) && "Imm too large!") ? static_cast
<void> (0) : __assert_fail ("Imm12 < (1 << 12) && \"Imm too large!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 400, __PRETTY_FUNCTION__))
;
401 bool isSub = Opc == sub;
402 return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
403 }
404 inline unsigned getAM2Offset(unsigned AM2Opc) {
405 return AM2Opc & ((1 << 12)-1);
406 }
407 inline AddrOpc getAM2Op(unsigned AM2Opc) {
408 return ((AM2Opc >> 12) & 1) ? sub : add;
409 }
410 inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
411 return (ShiftOpc)((AM2Opc >> 13) & 7);
412 }
413 inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); }
414
415 //===--------------------------------------------------------------------===//
416 // Addressing Mode #3
417 //===--------------------------------------------------------------------===//
418 //
419 // This is used for sign-extending loads, and load/store-pair instructions.
420 //
421 // addrmode3 := reg +/- reg
422 // addrmode3 := reg +/- imm8
423 //
424 // The first operand is always a Reg. The second operand is a reg if in
425 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
426 // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
427 // index mode.
428
429 /// getAM3Opc - This function encodes the addrmode3 opc field.
430 inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
431 unsigned IdxMode = 0) {
432 bool isSub = Opc == sub;
433 return ((int)isSub << 8) | Offset | (IdxMode << 9);
434 }
435 inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; }
436 inline AddrOpc getAM3Op(unsigned AM3Opc) {
437 return ((AM3Opc >> 8) & 1) ? sub : add;
438 }
439 inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); }
440
441 //===--------------------------------------------------------------------===//
442 // Addressing Mode #4
443 //===--------------------------------------------------------------------===//
444 //
445 // This is used for load / store multiple instructions.
446 //
447 // addrmode4 := reg, <mode>
448 //
449 // The four modes are:
450 // IA - Increment after
451 // IB - Increment before
452 // DA - Decrement after
453 // DB - Decrement before
454 // For VFP instructions, only the IA and DB modes are valid.
455
456 inline AMSubMode getAM4SubMode(unsigned Mode) {
457 return (AMSubMode)(Mode & 0x7);
458 }
459
460 inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; }
461
462 //===--------------------------------------------------------------------===//
463 // Addressing Mode #5
464 //===--------------------------------------------------------------------===//
465 //
466 // This is used for coprocessor instructions, such as FP load/stores.
467 //
468 // addrmode5 := reg +/- imm8*4
469 //
470 // The first operand is always a Reg. The second operand encodes the
471 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
472
473 /// getAM5Opc - This function encodes the addrmode5 opc field.
474 inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
475 bool isSub = Opc == sub;
476 return ((int)isSub << 8) | Offset;
477 }
478 inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; }
479 inline AddrOpc getAM5Op(unsigned AM5Opc) {
480 return ((AM5Opc >> 8) & 1) ? sub : add;
481 }
482
483 //===--------------------------------------------------------------------===//
484 // Addressing Mode #5 FP16
485 //===--------------------------------------------------------------------===//
486 //
487 // This is used for coprocessor instructions, such as 16-bit FP load/stores.
488 //
489 // addrmode5fp16 := reg +/- imm8*2
490 //
491 // The first operand is always a Reg. The second operand encodes the
492 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
493
494 /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
495 inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
496 bool isSub = Opc == sub;
497 return ((int)isSub << 8) | Offset;
498 }
499 inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
500 return AM5Opc & 0xFF;
501 }
502 inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
503 return ((AM5Opc >> 8) & 1) ? sub : add;
504 }
505
506 //===--------------------------------------------------------------------===//
507 // Addressing Mode #6
508 //===--------------------------------------------------------------------===//
509 //
510 // This is used for NEON load / store instructions.
511 //
512 // addrmode6 := reg with optional alignment
513 //
514 // This is stored in two operands [regaddr, align]. The first is the
515 // address register. The second operand is the value of the alignment
516 // specifier in bytes or zero if no explicit alignment.
517 // Valid alignments depend on the specific instruction.
518
519 //===--------------------------------------------------------------------===//
520 // NEON Modified Immediates
521 //===--------------------------------------------------------------------===//
522 //
523 // Several NEON instructions (e.g., VMOV) take a "modified immediate"
524 // vector operand, where a small immediate encoded in the instruction
525 // specifies a full NEON vector value. These modified immediates are
526 // represented here as encoded integers. The low 8 bits hold the immediate
527 // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
528 // the "Cmode" field of the instruction. The interfaces below treat the
529 // Op and Cmode values as a single 5-bit value.
530
531 inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
532 return (OpCmode << 8) | Val;
533 }
534 inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
535 return (ModImm >> 8) & 0x1f;
536 }
537 inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; }
538
539 /// decodeNEONModImm - Decode a NEON modified immediate value into the
540 /// element value and the element size in bits. (If the element size is
541 /// smaller than the vector, it is splatted into all the elements.)
542 inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
543 unsigned OpCmode = getNEONModImmOpCmode(ModImm);
544 unsigned Imm8 = getNEONModImmVal(ModImm);
545 uint64_t Val = 0;
546
547 if (OpCmode == 0xe) {
548 // 8-bit vector elements
549 Val = Imm8;
550 EltBits = 8;
551 } else if ((OpCmode & 0xc) == 0x8) {
552 // 16-bit vector elements
553 unsigned ByteNum = (OpCmode & 0x6) >> 1;
554 Val = Imm8 << (8 * ByteNum);
555 EltBits = 16;
556 } else if ((OpCmode & 0x8) == 0) {
557 // 32-bit vector elements, zero with one byte set
558 unsigned ByteNum = (OpCmode & 0x6) >> 1;
559 Val = Imm8 << (8 * ByteNum);
560 EltBits = 32;
561 } else if ((OpCmode & 0xe) == 0xc) {
562 // 32-bit vector elements, one byte with low bits set
563 unsigned ByteNum = 1 + (OpCmode & 0x1);
564 Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
565 EltBits = 32;
566 } else if (OpCmode == 0x1e) {
567 // 64-bit vector elements
568 for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
569 if ((ModImm >> ByteNum) & 1)
570 Val |= (uint64_t)0xff << (8 * ByteNum);
571 }
572 EltBits = 64;
573 } else {
574 llvm_unreachable("Unsupported NEON immediate")::llvm::llvm_unreachable_internal("Unsupported NEON immediate"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 574)
;
575 }
576 return Val;
577 }
578
579 // Generic validation for single-byte immediate (0X00, 00X0, etc).
580 inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
581 assert(Size >= 1 && Size <= 4 && "Invalid size")((Size >= 1 && Size <= 4 && "Invalid size"
) ? static_cast<void> (0) : __assert_fail ("Size >= 1 && Size <= 4 && \"Invalid size\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 581, __PRETTY_FUNCTION__))
;
582 unsigned count = 0;
583 for (unsigned i = 0; i < Size; ++i) {
584 if (Value & 0xff) count++;
585 Value >>= 8;
586 }
587 return count == 1;
588 }
589
590 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
591 inline bool isNEONi16splat(unsigned Value) {
592 if (Value > 0xffff)
593 return false;
594 // i16 value with set bits only in one byte X0 or 0X.
595 return Value == 0 || isNEONBytesplat(Value, 2);
596 }
597
598 // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
599 inline unsigned encodeNEONi16splat(unsigned Value) {
600 assert(isNEONi16splat(Value) && "Invalid NEON splat value")((isNEONi16splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi16splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 600, __PRETTY_FUNCTION__))
;
601 if (Value >= 0x100)
602 Value = (Value >> 8) | 0xa00;
603 else
604 Value |= 0x800;
605 return Value;
606 }
607
608 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
609 inline bool isNEONi32splat(unsigned Value) {
610 // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
611 return Value == 0 || isNEONBytesplat(Value, 4);
612 }
613
614 /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
615 inline unsigned encodeNEONi32splat(unsigned Value) {
616 assert(isNEONi32splat(Value) && "Invalid NEON splat value")((isNEONi32splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi32splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 616, __PRETTY_FUNCTION__))
;
617 if (Value >= 0x100 && Value <= 0xff00)
618 Value = (Value >> 8) | 0x200;
619 else if (Value > 0xffff && Value <= 0xff0000)
620 Value = (Value >> 16) | 0x400;
621 else if (Value > 0xffffff)
622 Value = (Value >> 24) | 0x600;
623 return Value;
624 }
625
626 //===--------------------------------------------------------------------===//
627 // Floating-point Immediates
628 //
629 inline float getFPImmFloat(unsigned Imm) {
630 // We expect an 8-bit binary encoding of a floating-point number here.
631
632 uint8_t Sign = (Imm >> 7) & 0x1;
633 uint8_t Exp = (Imm >> 4) & 0x7;
634 uint8_t Mantissa = Imm & 0xf;
635
636 // 8-bit FP IEEE Float Encoding
637 // abcd efgh aBbbbbbc defgh000 00000000 00000000
638 //
639 // where B = NOT(b);
640 uint32_t I = 0;
641 I |= Sign << 31;
642 I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
643 I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
644 I |= (Exp & 0x3) << 23;
645 I |= Mantissa << 19;
646 return bit_cast<float>(I);
647 }
648
649 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
650 /// floating-point value. If the value cannot be represented as an 8-bit
651 /// floating-point value, then return -1.
652 inline int getFP16Imm(const APInt &Imm) {
653 uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
654 int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
655 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
656
657 // We can handle 4 bits of mantissa.
658 // mantissa = (16+UInt(e:f:g:h))/16.
659 if (Mantissa & 0x3f)
660 return -1;
661 Mantissa >>= 6;
662
663 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
664 if (Exp < -3 || Exp > 4)
665 return -1;
666 Exp = ((Exp+3) & 0x7) ^ 4;
667
668 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
669 }
670
671 inline int getFP16Imm(const APFloat &FPImm) {
672 return getFP16Imm(FPImm.bitcastToAPInt());
673 }
674
675 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
676 /// floating-point value. If the value cannot be represented as an 8-bit
677 /// floating-point value, then return -1.
678 inline int getFP32Imm(const APInt &Imm) {
679 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
680 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
681 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
682
683 // We can handle 4 bits of mantissa.
684 // mantissa = (16+UInt(e:f:g:h))/16.
685 if (Mantissa & 0x7ffff)
686 return -1;
687 Mantissa >>= 19;
688 if ((Mantissa & 0xf) != Mantissa)
689 return -1;
690
691 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
692 if (Exp < -3 || Exp > 4)
693 return -1;
694 Exp = ((Exp+3) & 0x7) ^ 4;
695
696 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
697 }
698
699 inline int getFP32Imm(const APFloat &FPImm) {
700 return getFP32Imm(FPImm.bitcastToAPInt());
701 }
702
703 /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
704 /// floating-point value. If the value cannot be represented as an 8-bit
705 /// floating-point value, then return -1.
706 inline int getFP64Imm(const APInt &Imm) {
707 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
708 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
709 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
710
711 // We can handle 4 bits of mantissa.
712 // mantissa = (16+UInt(e:f:g:h))/16.
713 if (Mantissa & 0xffffffffffffULL)
714 return -1;
715 Mantissa >>= 48;
716 if ((Mantissa & 0xf) != Mantissa)
717 return -1;
718
719 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
720 if (Exp < -3 || Exp > 4)
721 return -1;
722 Exp = ((Exp+3) & 0x7) ^ 4;
723
724 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
725 }
726
727 inline int getFP64Imm(const APFloat &FPImm) {
728 return getFP64Imm(FPImm.bitcastToAPInt());
729 }
730
731} // end namespace ARM_AM
732} // end namespace llvm
733
734#endif
735

/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains some functions that are useful for math stuff.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_SUPPORT_MATHEXTRAS_H
15#define LLVM_SUPPORT_MATHEXTRAS_H
16
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/SwapByteOrder.h"
19#include <algorithm>
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <limits>
24#include <type_traits>
25
26#ifdef __ANDROID_NDK__
27#include <android/api-level.h>
28#endif
29
30#ifdef _MSC_VER
31// Declare these intrinsics manually rather including intrin.h. It's very
32// expensive, and MathExtras.h is popular.
33// #include <intrin.h>
34extern "C" {
35unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
36unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
37unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
38unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
39}
40#endif
41
42namespace llvm {
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53namespace detail {
54template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
55 static std::size_t count(T Val, ZeroBehavior) {
56 if (!Val)
57 return std::numeric_limits<T>::digits;
58 if (Val & 0x1)
59 return 0;
60
61 // Bisection method.
62 std::size_t ZeroBits = 0;
63 T Shift = std::numeric_limits<T>::digits >> 1;
64 T Mask = std::numeric_limits<T>::max() >> Shift;
65 while (Shift) {
66 if ((Val & Mask) == 0) {
67 Val >>= Shift;
68 ZeroBits |= Shift;
69 }
70 Shift >>= 1;
71 Mask >>= Shift;
72 }
73 return ZeroBits;
74 }
75};
76
77#if __GNUC__4 >= 4 || defined(_MSC_VER)
78template <typename T> struct TrailingZerosCounter<T, 4> {
79 static std::size_t count(T Val, ZeroBehavior ZB) {
80 if (ZB != ZB_Undefined && Val == 0)
20
Assuming 'Val' is equal to 0
21
Taking true branch
81 return 32;
22
Returning the value 32
82
83#if __has_builtin(__builtin_ctz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
84 return __builtin_ctz(Val);
85#elif defined(_MSC_VER)
86 unsigned long Index;
87 _BitScanForward(&Index, Val);
88 return Index;
89#endif
90 }
91};
92
93#if !defined(_MSC_VER) || defined(_M_X64)
94template <typename T> struct TrailingZerosCounter<T, 8> {
95 static std::size_t count(T Val, ZeroBehavior ZB) {
96 if (ZB != ZB_Undefined && Val == 0)
97 return 64;
98
99#if __has_builtin(__builtin_ctzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
100 return __builtin_ctzll(Val);
101#elif defined(_MSC_VER)
102 unsigned long Index;
103 _BitScanForward64(&Index, Val);
104 return Index;
105#endif
106 }
107};
108#endif
109#endif
110} // namespace detail
111
112/// Count number of 0's from the least significant bit to the most
113/// stopping at the first 1.
114///
115/// Only unsigned integral types are allowed.
116///
117/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
118/// valid arguments.
119template <typename T>
120std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
121 static_assert(std::numeric_limits<T>::is_integer &&
122 !std::numeric_limits<T>::is_signed,
123 "Only unsigned integral types are allowed.");
124 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
19
Calling 'TrailingZerosCounter::count'
23
Returning from 'TrailingZerosCounter::count'
24
Returning the value 32
125}
126
127namespace detail {
128template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
129 static std::size_t count(T Val, ZeroBehavior) {
130 if (!Val)
131 return std::numeric_limits<T>::digits;
132
133 // Bisection method.
134 std::size_t ZeroBits = 0;
135 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
136 T Tmp = Val >> Shift;
137 if (Tmp)
138 Val = Tmp;
139 else
140 ZeroBits |= Shift;
141 }
142 return ZeroBits;
143 }
144};
145
146#if __GNUC__4 >= 4 || defined(_MSC_VER)
147template <typename T> struct LeadingZerosCounter<T, 4> {
148 static std::size_t count(T Val, ZeroBehavior ZB) {
149 if (ZB != ZB_Undefined && Val == 0)
150 return 32;
151
152#if __has_builtin(__builtin_clz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
153 return __builtin_clz(Val);
154#elif defined(_MSC_VER)
155 unsigned long Index;
156 _BitScanReverse(&Index, Val);
157 return Index ^ 31;
158#endif
159 }
160};
161
162#if !defined(_MSC_VER) || defined(_M_X64)
163template <typename T> struct LeadingZerosCounter<T, 8> {
164 static std::size_t count(T Val, ZeroBehavior ZB) {
165 if (ZB != ZB_Undefined && Val == 0)
166 return 64;
167
168#if __has_builtin(__builtin_clzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
169 return __builtin_clzll(Val);
170#elif defined(_MSC_VER)
171 unsigned long Index;
172 _BitScanReverse64(&Index, Val);
173 return Index ^ 63;
174#endif
175 }
176};
177#endif
178#endif
179} // namespace detail
180
181/// Count number of 0's from the most significant bit to the least
182/// stopping at the first 1.
183///
184/// Only unsigned integral types are allowed.
185///
186/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
187/// valid arguments.
188template <typename T>
189std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
190 static_assert(std::numeric_limits<T>::is_integer &&
191 !std::numeric_limits<T>::is_signed,
192 "Only unsigned integral types are allowed.");
193 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
194}
195
196/// Get the index of the first set bit starting from the least
197/// significant bit.
198///
199/// Only unsigned integral types are allowed.
200///
201/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
202/// valid arguments.
203template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
204 if (ZB == ZB_Max && Val == 0)
205 return std::numeric_limits<T>::max();
206
207 return countTrailingZeros(Val, ZB_Undefined);
208}
209
210/// Create a bitmask with the N right-most bits set to 1, and all other
211/// bits set to 0. Only unsigned types are allowed.
212template <typename T> T maskTrailingOnes(unsigned N) {
213 static_assert(std::is_unsigned<T>::value, "Invalid type!");
214 const unsigned Bits = CHAR_BIT8 * sizeof(T);
215 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 215, __PRETTY_FUNCTION__))
;
216 return N == 0 ? 0 : (T(-1) >> (Bits - N));
217}
218
219/// Create a bitmask with the N left-most bits set to 1, and all other
220/// bits set to 0. Only unsigned types are allowed.
221template <typename T> T maskLeadingOnes(unsigned N) {
222 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
223}
224
225/// Create a bitmask with the N right-most bits set to 0, and all other
226/// bits set to 1. Only unsigned types are allowed.
227template <typename T> T maskTrailingZeros(unsigned N) {
228 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
229}
230
231/// Create a bitmask with the N left-most bits set to 0, and all other
232/// bits set to 1. Only unsigned types are allowed.
233template <typename T> T maskLeadingZeros(unsigned N) {
234 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
235}
236
237/// Get the index of the last set bit starting from the least
238/// significant bit.
239///
240/// Only unsigned integral types are allowed.
241///
242/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
243/// valid arguments.
244template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
245 if (ZB == ZB_Max && Val == 0)
246 return std::numeric_limits<T>::max();
247
248 // Use ^ instead of - because both gcc and llvm can remove the associated ^
249 // in the __builtin_clz intrinsic on x86.
250 return countLeadingZeros(Val, ZB_Undefined) ^
251 (std::numeric_limits<T>::digits - 1);
252}
253
254/// Macro compressed bit reversal table for 256 bits.
255///
256/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
257static const unsigned char BitReverseTable256[256] = {
258#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
259#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
260#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
261 R6(0), R6(2), R6(1), R6(3)
262#undef R2
263#undef R4
264#undef R6
265};
266
267/// Reverse the bits in \p Val.
268template <typename T>
269T reverseBits(T Val) {
270 unsigned char in[sizeof(Val)];
271 unsigned char out[sizeof(Val)];
272 std::memcpy(in, &Val, sizeof(Val));
273 for (unsigned i = 0; i < sizeof(Val); ++i)
274 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
275 std::memcpy(&Val, out, sizeof(Val));
276 return Val;
277}
278
279// NOTE: The following support functions use the _32/_64 extensions instead of
280// type overloading so that signed and unsigned integers can be used without
281// ambiguity.
282
283/// Return the high 32 bits of a 64 bit value.
284constexpr inline uint32_t Hi_32(uint64_t Value) {
285 return static_cast<uint32_t>(Value >> 32);
286}
287
288/// Return the low 32 bits of a 64 bit value.
289constexpr inline uint32_t Lo_32(uint64_t Value) {
290 return static_cast<uint32_t>(Value);
291}
292
293/// Make a 64-bit integer from a high / low pair of 32-bit integers.
294constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
295 return ((uint64_t)High << 32) | (uint64_t)Low;
296}
297
298/// Checks if an integer fits into the given bit width.
299template <unsigned N> constexpr inline bool isInt(int64_t x) {
300 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
301}
302// Template specializations to get better code for common cases.
303template <> constexpr inline bool isInt<8>(int64_t x) {
304 return static_cast<int8_t>(x) == x;
305}
306template <> constexpr inline bool isInt<16>(int64_t x) {
307 return static_cast<int16_t>(x) == x;
308}
309template <> constexpr inline bool isInt<32>(int64_t x) {
310 return static_cast<int32_t>(x) == x;
311}
312
313/// Checks if a signed integer is an N bit number shifted left by S.
314template <unsigned N, unsigned S>
315constexpr inline bool isShiftedInt(int64_t x) {
316 static_assert(
317 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
318 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
319 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
320}
321
322/// Checks if an unsigned integer fits into the given bit width.
323///
324/// This is written as two functions rather than as simply
325///
326/// return N >= 64 || X < (UINT64_C(1) << N);
327///
328/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
329/// left too many places.
330template <unsigned N>
331constexpr inline typename std::enable_if<(N < 64), bool>::type
332isUInt(uint64_t X) {
333 static_assert(N > 0, "isUInt<0> doesn't make sense");
334 return X < (UINT64_C(1)1UL << (N));
335}
336template <unsigned N>
337constexpr inline typename std::enable_if<N >= 64, bool>::type
338isUInt(uint64_t X) {
339 return true;
340}
341
342// Template specializations to get better code for common cases.
343template <> constexpr inline bool isUInt<8>(uint64_t x) {
344 return static_cast<uint8_t>(x) == x;
345}
346template <> constexpr inline bool isUInt<16>(uint64_t x) {
347 return static_cast<uint16_t>(x) == x;
348}
349template <> constexpr inline bool isUInt<32>(uint64_t x) {
350 return static_cast<uint32_t>(x) == x;
351}
352
353/// Checks if a unsigned integer is an N bit number shifted left by S.
354template <unsigned N, unsigned S>
355constexpr inline bool isShiftedUInt(uint64_t x) {
356 static_assert(
357 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
358 static_assert(N + S <= 64,
359 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
360 // Per the two static_asserts above, S must be strictly less than 64. So
361 // 1 << S is not undefined behavior.
362 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
363}
364
365/// Gets the maximum value for a N-bit unsigned integer.
366inline uint64_t maxUIntN(uint64_t N) {
367 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 367, __PRETTY_FUNCTION__))
;
368
369 // uint64_t(1) << 64 is undefined behavior, so we can't do
370 // (uint64_t(1) << N) - 1
371 // without checking first that N != 64. But this works and doesn't have a
372 // branch.
373 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
374}
375
376/// Gets the minimum value for a N-bit signed integer.
377inline int64_t minIntN(int64_t N) {
378 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 378, __PRETTY_FUNCTION__))
;
379
380 return -(UINT64_C(1)1UL<<(N-1));
381}
382
383/// Gets the maximum value for a N-bit signed integer.
384inline int64_t maxIntN(int64_t N) {
385 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 385, __PRETTY_FUNCTION__))
;
386
387 // This relies on two's complement wraparound when N == 64, so we convert to
388 // int64_t only at the very end to avoid UB.
389 return (UINT64_C(1)1UL << (N - 1)) - 1;
390}
391
392/// Checks if an unsigned integer fits into the given (dynamic) bit width.
393inline bool isUIntN(unsigned N, uint64_t x) {
394 return N >= 64 || x <= maxUIntN(N);
395}
396
397/// Checks if an signed integer fits into the given (dynamic) bit width.
398inline bool isIntN(unsigned N, int64_t x) {
399 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
400}
401
402/// Return true if the argument is a non-empty sequence of ones starting at the
403/// least significant bit with the remainder zero (32 bit version).
404/// Ex. isMask_32(0x0000FFFFU) == true.
405constexpr inline bool isMask_32(uint32_t Value) {
406 return Value && ((Value + 1) & Value) == 0;
407}
408
409/// Return true if the argument is a non-empty sequence of ones starting at the
410/// least significant bit with the remainder zero (64 bit version).
411constexpr inline bool isMask_64(uint64_t Value) {
412 return Value && ((Value + 1) & Value) == 0;
413}
414
415/// Return true if the argument contains a non-empty sequence of ones with the
416/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
417constexpr inline bool isShiftedMask_32(uint32_t Value) {
418 return Value && isMask_32((Value - 1) | Value);
419}
420
421/// Return true if the argument contains a non-empty sequence of ones with the
422/// remainder zero (64 bit version.)
423constexpr inline bool isShiftedMask_64(uint64_t Value) {
424 return Value && isMask_64((Value - 1) | Value);
425}
426
427/// Return true if the argument is a power of two > 0.
428/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
429constexpr inline bool isPowerOf2_32(uint32_t Value) {
430 return Value && !(Value & (Value - 1));
431}
432
433/// Return true if the argument is a power of two > 0 (64 bit edition.)
434constexpr inline bool isPowerOf2_64(uint64_t Value) {
435 return Value && !(Value & (Value - 1));
436}
437
438/// Return a byte-swapped representation of the 16-bit argument.
439inline uint16_t ByteSwap_16(uint16_t Value) {
440 return sys::SwapByteOrder_16(Value);
441}
442
443/// Return a byte-swapped representation of the 32-bit argument.
444inline uint32_t ByteSwap_32(uint32_t Value) {
445 return sys::SwapByteOrder_32(Value);
446}
447
448/// Return a byte-swapped representation of the 64-bit argument.
449inline uint64_t ByteSwap_64(uint64_t Value) {
450 return sys::SwapByteOrder_64(Value);
451}
452
453/// Count the number of ones from the most significant bit to the first
454/// zero bit.
455///
456/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
457/// Only unsigned integral types are allowed.
458///
459/// \param ZB the behavior on an input of all ones. Only ZB_Width and
460/// ZB_Undefined are valid arguments.
461template <typename T>
462std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
463 static_assert(std::numeric_limits<T>::is_integer &&
464 !std::numeric_limits<T>::is_signed,
465 "Only unsigned integral types are allowed.");
466 return countLeadingZeros<T>(~Value, ZB);
467}
468
469/// Count the number of ones from the least significant bit to the first
470/// zero bit.
471///
472/// Ex. countTrailingOnes(0x00FF00FF) == 8.
473/// Only unsigned integral types are allowed.
474///
475/// \param ZB the behavior on an input of all ones. Only ZB_Width and
476/// ZB_Undefined are valid arguments.
477template <typename T>
478std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
479 static_assert(std::numeric_limits<T>::is_integer &&
480 !std::numeric_limits<T>::is_signed,
481 "Only unsigned integral types are allowed.");
482 return countTrailingZeros<T>(~Value, ZB);
483}
484
485namespace detail {
486template <typename T, std::size_t SizeOfT> struct PopulationCounter {
487 static unsigned count(T Value) {
488 // Generic version, forward to 32 bits.
489 static_assert(SizeOfT <= 4, "Not implemented!");
490#if __GNUC__4 >= 4
491 return __builtin_popcount(Value);
492#else
493 uint32_t v = Value;
494 v = v - ((v >> 1) & 0x55555555);
495 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
496 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
497#endif
498 }
499};
500
501template <typename T> struct PopulationCounter<T, 8> {
502 static unsigned count(T Value) {
503#if __GNUC__4 >= 4
504 return __builtin_popcountll(Value);
505#else
506 uint64_t v = Value;
507 v = v - ((v >> 1) & 0x5555555555555555ULL);
508 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
509 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
510 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
511#endif
512 }
513};
514} // namespace detail
515
516/// Count the number of set bits in a value.
517/// Ex. countPopulation(0xF000F000) = 8
518/// Returns 0 if the word is zero.
519template <typename T>
520inline unsigned countPopulation(T Value) {
521 static_assert(std::numeric_limits<T>::is_integer &&
522 !std::numeric_limits<T>::is_signed,
523 "Only unsigned integral types are allowed.");
524 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
525}
526
527/// Return the log base 2 of the specified value.
528inline double Log2(double Value) {
529#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
530 return __builtin_log(Value) / __builtin_log(2.0);
531#else
532 return log2(Value);
533#endif
534}
535
536/// Return the floor log base 2 of the specified value, -1 if the value is zero.
537/// (32 bit edition.)
538/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
539inline unsigned Log2_32(uint32_t Value) {
540 return 31 - countLeadingZeros(Value);
541}
542
543/// Return the floor log base 2 of the specified value, -1 if the value is zero.
544/// (64 bit edition.)
545inline unsigned Log2_64(uint64_t Value) {
546 return 63 - countLeadingZeros(Value);
547}
548
549/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
550/// (32 bit edition).
551/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
552inline unsigned Log2_32_Ceil(uint32_t Value) {
553 return 32 - countLeadingZeros(Value - 1);
554}
555
556/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
557/// (64 bit edition.)
558inline unsigned Log2_64_Ceil(uint64_t Value) {
559 return 64 - countLeadingZeros(Value - 1);
560}
561
562/// Return the greatest common divisor of the values using Euclid's algorithm.
563inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
564 while (B) {
565 uint64_t T = B;
566 B = A % B;
567 A = T;
568 }
569 return A;
570}
571
572/// This function takes a 64-bit integer and returns the bit equivalent double.
573inline double BitsToDouble(uint64_t Bits) {
574 double D;
575 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
576 memcpy(&D, &Bits, sizeof(Bits));
577 return D;
578}
579
580/// This function takes a 32-bit integer and returns the bit equivalent float.
581inline float BitsToFloat(uint32_t Bits) {
582 float F;
583 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
584 memcpy(&F, &Bits, sizeof(Bits));
585 return F;
586}
587
588/// This function takes a double and returns the bit equivalent 64-bit integer.
589/// Note that copying doubles around changes the bits of NaNs on some hosts,
590/// notably x86, so this routine cannot be used if these bits are needed.
591inline uint64_t DoubleToBits(double Double) {
592 uint64_t Bits;
593 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
594 memcpy(&Bits, &Double, sizeof(Double));
595 return Bits;
596}
597
598/// This function takes a float and returns the bit equivalent 32-bit integer.
599/// Note that copying floats around changes the bits of NaNs on some hosts,
600/// notably x86, so this routine cannot be used if these bits are needed.
601inline uint32_t FloatToBits(float Float) {
602 uint32_t Bits;
603 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
604 memcpy(&Bits, &Float, sizeof(Float));
605 return Bits;
606}
607
608/// A and B are either alignments or offsets. Return the minimum alignment that
609/// may be assumed after adding the two together.
610constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
611 // The largest power of 2 that divides both A and B.
612 //
613 // Replace "-Value" by "1+~Value" in the following commented code to avoid
614 // MSVC warning C4146
615 // return (A | B) & -(A | B);
616 return (A | B) & (1 + ~(A | B));
617}
618
619/// Aligns \c Addr to \c Alignment bytes, rounding up.
620///
621/// Alignment should be a power of two. This method rounds up, so
622/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
623inline uintptr_t alignAddr(const void *Addr, size_t Alignment) {
624 assert(Alignment && isPowerOf2_64((uint64_t)Alignment) &&((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 625, __PRETTY_FUNCTION__))
625 "Alignment is not a power of two!")((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 625, __PRETTY_FUNCTION__))
;
626
627 assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr)(((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr) ? static_cast
<void> (0) : __assert_fail ("(uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr"
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 627, __PRETTY_FUNCTION__))
;
628
629 return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1));
630}
631
632/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment
633/// bytes, rounding up.
634inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) {
635 return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
636}
637
638/// Returns the next power of two (in 64-bits) that is strictly greater than A.
639/// Returns zero on overflow.
640inline uint64_t NextPowerOf2(uint64_t A) {
641 A |= (A >> 1);
642 A |= (A >> 2);
643 A |= (A >> 4);
644 A |= (A >> 8);
645 A |= (A >> 16);
646 A |= (A >> 32);
647 return A + 1;
648}
649
650/// Returns the power of two which is less than or equal to the given value.
651/// Essentially, it is a floor operation across the domain of powers of two.
652inline uint64_t PowerOf2Floor(uint64_t A) {
653 if (!A) return 0;
654 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
655}
656
657/// Returns the power of two which is greater than or equal to the given value.
658/// Essentially, it is a ceil operation across the domain of powers of two.
659inline uint64_t PowerOf2Ceil(uint64_t A) {
660 if (!A)
661 return 0;
662 return NextPowerOf2(A - 1);
663}
664
665/// Returns the next integer (mod 2**64) that is greater than or equal to
666/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
667///
668/// If non-zero \p Skew is specified, the return value will be a minimal
669/// integer that is greater than or equal to \p Value and equal to
670/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
671/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
672///
673/// Examples:
674/// \code
675/// alignTo(5, 8) = 8
676/// alignTo(17, 8) = 24
677/// alignTo(~0LL, 8) = 0
678/// alignTo(321, 255) = 510
679///
680/// alignTo(5, 8, 7) = 7
681/// alignTo(17, 8, 1) = 17
682/// alignTo(~0LL, 8, 3) = 3
683/// alignTo(321, 255, 42) = 552
684/// \endcode
685inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
686 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 686, __PRETTY_FUNCTION__))
;
687 Skew %= Align;
688 return (Value + Align - 1 - Skew) / Align * Align + Skew;
689}
690
691/// Returns the next integer (mod 2**64) that is greater than or equal to
692/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
693template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
694 static_assert(Align != 0u, "Align must be non-zero");
695 return (Value + Align - 1) / Align * Align;
696}
697
698/// Returns the integer ceil(Numerator / Denominator).
699inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
700 return alignTo(Numerator, Denominator) / Denominator;
701}
702
703/// \c alignTo for contexts where a constant expression is required.
704/// \sa alignTo
705///
706/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
707template <uint64_t Align>
708struct AlignTo {
709 static_assert(Align != 0u, "Align must be non-zero");
710 template <uint64_t Value>
711 struct from_value {
712 static const uint64_t value = (Value + Align - 1) / Align * Align;
713 };
714};
715
716/// Returns the largest uint64_t less than or equal to \p Value and is
717/// \p Skew mod \p Align. \p Align must be non-zero
718inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
719 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 719, __PRETTY_FUNCTION__))
;
720 Skew %= Align;
721 return (Value - Skew) / Align * Align + Skew;
722}
723
724/// Returns the offset to the next integer (mod 2**64) that is greater than
725/// or equal to \p Value and is a multiple of \p Align. \p Align must be
726/// non-zero.
727inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
728 return alignTo(Value, Align) - Value;
729}
730
731/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
732/// Requires 0 < B <= 32.
733template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
734 static_assert(B > 0, "Bit width can't be 0.");
735 static_assert(B <= 32, "Bit width out of range.");
736 return int32_t(X << (32 - B)) >> (32 - B);
737}
738
739/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
740/// Requires 0 < B < 32.
741inline int32_t SignExtend32(uint32_t X, unsigned B) {
742 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 742, __PRETTY_FUNCTION__))
;
743 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 743, __PRETTY_FUNCTION__))
;
744 return int32_t(X << (32 - B)) >> (32 - B);
745}
746
747/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
748/// Requires 0 < B < 64.
749template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
750 static_assert(B > 0, "Bit width can't be 0.");
751 static_assert(B <= 64, "Bit width out of range.");
752 return int64_t(x << (64 - B)) >> (64 - B);
753}
754
755/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
756/// Requires 0 < B < 64.
757inline int64_t SignExtend64(uint64_t X, unsigned B) {
758 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 758, __PRETTY_FUNCTION__))
;
759 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/Support/MathExtras.h"
, 759, __PRETTY_FUNCTION__))
;
760 return int64_t(X << (64 - B)) >> (64 - B);
761}
762
763/// Subtract two unsigned integers, X and Y, of type T and return the absolute
764/// value of the result.
765template <typename T>
766typename std::enable_if<std::is_unsigned<T>::value, T>::type
767AbsoluteDifference(T X, T Y) {
768 return std::max(X, Y) - std::min(X, Y);
769}
770
771/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
772/// maximum representable value of T on overflow. ResultOverflowed indicates if
773/// the result is larger than the maximum representable value of type T.
774template <typename T>
775typename std::enable_if<std::is_unsigned<T>::value, T>::type
776SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
777 bool Dummy;
778 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
779 // Hacker's Delight, p. 29
780 T Z = X + Y;
781 Overflowed = (Z < X || Z < Y);
782 if (Overflowed)
783 return std::numeric_limits<T>::max();
784 else
785 return Z;
786}
787
788/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
789/// maximum representable value of T on overflow. ResultOverflowed indicates if
790/// the result is larger than the maximum representable value of type T.
791template <typename T>
792typename std::enable_if<std::is_unsigned<T>::value, T>::type
793SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
794 bool Dummy;
795 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
796
797 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
798 // because it fails for uint16_t (where multiplication can have undefined
799 // behavior due to promotion to int), and requires a division in addition
800 // to the multiplication.
801
802 Overflowed = false;
803
804 // Log2(Z) would be either Log2Z or Log2Z + 1.
805 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
806 // will necessarily be less than Log2Max as desired.
807 int Log2Z = Log2_64(X) + Log2_64(Y);
808 const T Max = std::numeric_limits<T>::max();
809 int Log2Max = Log2_64(Max);
810 if (Log2Z < Log2Max) {
811 return X * Y;
812 }
813 if (Log2Z > Log2Max) {
814 Overflowed = true;
815 return Max;
816 }
817
818 // We're going to use the top bit, and maybe overflow one
819 // bit past it. Multiply all but the bottom bit then add
820 // that on at the end.
821 T Z = (X >> 1) * Y;
822 if (Z & ~(Max >> 1)) {
823 Overflowed = true;
824 return Max;
825 }
826 Z <<= 1;
827 if (X & 1)
828 return SaturatingAdd(Z, Y, ResultOverflowed);
829
830 return Z;
831}
832
833/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
834/// the product. Clamp the result to the maximum representable value of T on
835/// overflow. ResultOverflowed indicates if the result is larger than the
836/// maximum representable value of type T.
837template <typename T>
838typename std::enable_if<std::is_unsigned<T>::value, T>::type
839SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
840 bool Dummy;
841 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
842
843 T Product = SaturatingMultiply(X, Y, &Overflowed);
844 if (Overflowed)
845 return Product;
846
847 return SaturatingAdd(A, Product, &Overflowed);
848}
849
850/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
851extern const float huge_valf;
852} // End llvm namespace
853
854#endif