Bug Summary

File:lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
Warning:line 221, column 16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn358520/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM -I /build/llvm-toolchain-snapshot-9~svn358520/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn358520/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn358520/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn358520=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-04-17-050842-1547-1 -x c++ /build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/ARMTargetTransformInfo.cpp -faddrsig

/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/ARMTargetTransformInfo.cpp

1//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ARMTargetTransformInfo.h"
10#include "ARMSubtarget.h"
11#include "MCTargetDesc/ARMAddressingModes.h"
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/SmallVector.h"
14#include "llvm/Analysis/LoopInfo.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/ISDOpcodes.h"
17#include "llvm/CodeGen/ValueTypes.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/CallSite.h"
20#include "llvm/IR/DataLayout.h"
21#include "llvm/IR/DerivedTypes.h"
22#include "llvm/IR/Instruction.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/Type.h"
25#include "llvm/MC/SubtargetFeature.h"
26#include "llvm/Support/Casting.h"
27#include "llvm/Support/MachineValueType.h"
28#include "llvm/Target/TargetMachine.h"
29#include <algorithm>
30#include <cassert>
31#include <cstdint>
32#include <utility>
33
34using namespace llvm;
35
36#define DEBUG_TYPE"armtti" "armtti"
37
38bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
39 const Function *Callee) const {
40 const TargetMachine &TM = getTLI()->getTargetMachine();
41 const FeatureBitset &CallerBits =
42 TM.getSubtargetImpl(*Caller)->getFeatureBits();
43 const FeatureBitset &CalleeBits =
44 TM.getSubtargetImpl(*Callee)->getFeatureBits();
45
46 // To inline a callee, all features not in the whitelist must match exactly.
47 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
48 (CalleeBits & ~InlineFeatureWhitelist);
49 // For features in the whitelist, the callee's features must be a subset of
50 // the callers'.
51 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
52 (CalleeBits & InlineFeatureWhitelist);
53 return MatchExact && MatchSubset;
54}
55
56int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
57 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 57, __PRETTY_FUNCTION__))
;
9
'?' condition is true
58
59 unsigned Bits = Ty->getPrimitiveSizeInBits();
60 if (Bits == 0 || Imm.getActiveBits() >= 64)
10
Assuming 'Bits' is not equal to 0
11
Assuming the condition is false
12
Taking false branch
61 return 4;
62
63 int64_t SImmVal = Imm.getSExtValue();
64 uint64_t ZImmVal = Imm.getZExtValue();
65 if (!ST->isThumb()) {
13
Taking false branch
66 if ((SImmVal >= 0 && SImmVal < 65536) ||
67 (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
68 (ARM_AM::getSOImmVal(~ZImmVal) != -1))
69 return 1;
70 return ST->hasV6T2Ops() ? 2 : 3;
71 }
72 if (ST->isThumb2()) {
14
Taking false branch
73 if ((SImmVal >= 0 && SImmVal < 65536) ||
74 (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
75 (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
76 return 1;
77 return ST->hasV6T2Ops() ? 2 : 3;
78 }
79 // Thumb1, any i8 imm cost 1.
80 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
15
Assuming 'Bits' is not equal to 8
16
Assuming 'SImmVal' is < 0
81 return 1;
82 if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
17
Assuming the condition is false
18
Calling 'isThumbImmShiftedVal'
83 return 2;
84 // Load from constantpool.
85 return 3;
86}
87
88// Constants smaller than 256 fit in the immediate field of
89// Thumb1 instructions so we return a zero cost and 1 otherwise.
90int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
91 const APInt &Imm, Type *Ty) {
92 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
93 return 0;
94
95 return 1;
96}
97
98int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
99 Type *Ty) {
100 // Division by a constant can be turned into multiplication, but only if we
101 // know it's constant. So it's not so much that the immediate is cheap (it's
102 // not), but that the alternative is worse.
103 // FIXME: this is probably unneeded with GlobalISel.
104 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1
Assuming 'Opcode' is not equal to SDiv
2
Assuming 'Opcode' is not equal to UDiv
105 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
3
Assuming 'Opcode' is not equal to SRem
4
Assuming 'Opcode' is not equal to URem
106 Idx == 1)
107 return 0;
108
109 if (Opcode == Instruction::And) {
5
Assuming 'Opcode' is equal to And
6
Taking true branch
110 // UXTB/UXTH
111 if (Imm == 255 || Imm == 65535)
7
Taking false branch
112 return 0;
113 // Conversion to BIC is free, and means we can use ~Imm instead.
114 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
8
Calling 'ARMTTIImpl::getIntImmCost'
115 }
116
117 if (Opcode == Instruction::Add)
118 // Conversion to SUB is free, and means we can use -Imm instead.
119 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
120
121 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
122 Ty->getIntegerBitWidth() == 32) {
123 int64_t NegImm = -Imm.getSExtValue();
124 if (ST->isThumb2() && NegImm < 1<<12)
125 // icmp X, #-C -> cmn X, #C
126 return 0;
127 if (ST->isThumb() && NegImm < 1<<8)
128 // icmp X, #-C -> adds X, #C
129 return 0;
130 }
131
132 // xor a, -1 can always be folded to MVN
133 if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
134 return 0;
135
136 return getIntImmCost(Imm, Ty);
137}
138
139int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
140 const Instruction *I) {
141 int ISD = TLI->InstructionOpcodeToISD(Opcode);
142 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 142, __PRETTY_FUNCTION__))
;
143
144 // Single to/from double precision conversions.
145 static const CostTblEntry NEONFltDblTbl[] = {
146 // Vector fptrunc/fpext conversions.
147 { ISD::FP_ROUND, MVT::v2f64, 2 },
148 { ISD::FP_EXTEND, MVT::v2f32, 2 },
149 { ISD::FP_EXTEND, MVT::v4f32, 4 }
150 };
151
152 if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
153 ISD == ISD::FP_EXTEND)) {
154 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
155 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
156 return LT.first * Entry->Cost;
157 }
158
159 EVT SrcTy = TLI->getValueType(DL, Src);
160 EVT DstTy = TLI->getValueType(DL, Dst);
161
162 if (!SrcTy.isSimple() || !DstTy.isSimple())
163 return BaseT::getCastInstrCost(Opcode, Dst, Src);
164
165 // Some arithmetic, load and store operations have specific instructions
166 // to cast up/down their types automatically at no extra cost.
167 // TODO: Get these tables to know at least what the related operations are.
168 static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
169 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
170 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
171 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
172 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
173 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
174 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
175
176 // The number of vmovl instructions for the extension.
177 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
178 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
179 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
180 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
181 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
182 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
183 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
184 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
185 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
186 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
187
188 // Operations that we legalize using splitting.
189 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
190 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
191
192 // Vector float <-> i32 conversions.
193 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
194 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
195
196 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
197 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
198 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
199 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
200 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
201 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
202 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
203 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
204 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
205 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
206 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
207 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
208 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
209 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
210 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
211 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
212 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
213 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
214 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
215 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
216
217 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
218 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
219 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
220 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
221 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
222 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
223
224 // Vector double <-> i32 conversions.
225 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
226 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
227
228 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
229 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
230 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
231 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
232 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
233 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
234
235 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
236 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
237 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
238 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
239 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
240 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
241 };
242
243 if (SrcTy.isVector() && ST->hasNEON()) {
244 if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
245 DstTy.getSimpleVT(),
246 SrcTy.getSimpleVT()))
247 return Entry->Cost;
248 }
249
250 // Scalar float to integer conversions.
251 static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
252 { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
253 { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
254 { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
255 { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
256 { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
257 { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
258 { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
259 { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
260 { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
261 { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
262 { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
263 { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
264 { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
265 { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
266 { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
267 { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
268 { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
269 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
270 { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
271 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
272 };
273 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
274 if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
275 DstTy.getSimpleVT(),
276 SrcTy.getSimpleVT()))
277 return Entry->Cost;
278 }
279
280 // Scalar integer to float conversions.
281 static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
282 { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
283 { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
284 { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
285 { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
286 { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
287 { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
288 { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
289 { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
290 { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
291 { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
292 { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
293 { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
294 { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
295 { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
296 { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
297 { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
298 { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
299 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
300 { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
301 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
302 };
303
304 if (SrcTy.isInteger() && ST->hasNEON()) {
305 if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
306 ISD, DstTy.getSimpleVT(),
307 SrcTy.getSimpleVT()))
308 return Entry->Cost;
309 }
310
311 // Scalar integer conversion costs.
312 static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
313 // i16 -> i64 requires two dependent operations.
314 { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
315
316 // Truncates on i64 are assumed to be free.
317 { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
318 { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
319 { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
320 { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
321 };
322
323 if (SrcTy.isInteger()) {
324 if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
325 DstTy.getSimpleVT(),
326 SrcTy.getSimpleVT()))
327 return Entry->Cost;
328 }
329
330 return BaseT::getCastInstrCost(Opcode, Dst, Src);
331}
332
333int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
334 unsigned Index) {
335 // Penalize inserting into an D-subregister. We end up with a three times
336 // lower estimated throughput on swift.
337 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
338 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
339 return 3;
340
341 if ((Opcode == Instruction::InsertElement ||
342 Opcode == Instruction::ExtractElement)) {
343 // Cross-class copies are expensive on many microarchitectures,
344 // so assume they are expensive by default.
345 if (ValTy->getVectorElementType()->isIntegerTy())
346 return 3;
347
348 // Even if it's not a cross class copy, this likely leads to mixing
349 // of NEON and VFP code and should be therefore penalized.
350 if (ValTy->isVectorTy() &&
351 ValTy->getScalarSizeInBits() <= 32)
352 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
353 }
354
355 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
356}
357
358int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
359 const Instruction *I) {
360 int ISD = TLI->InstructionOpcodeToISD(Opcode);
361 // On NEON a vector select gets lowered to vbsl.
362 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
363 // Lowering of some vector selects is currently far from perfect.
364 static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
365 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
366 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
367 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
368 };
369
370 EVT SelCondTy = TLI->getValueType(DL, CondTy);
371 EVT SelValTy = TLI->getValueType(DL, ValTy);
372 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
373 if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
374 SelCondTy.getSimpleVT(),
375 SelValTy.getSimpleVT()))
376 return Entry->Cost;
377 }
378
379 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
380 return LT.first;
381 }
382
383 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
384}
385
386int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
387 const SCEV *Ptr) {
388 // Address computations in vectorized code with non-consecutive addresses will
389 // likely result in more instructions compared to scalar code where the
390 // computation can more often be merged into the index mode. The resulting
391 // extra micro-ops can significantly decrease throughput.
392 unsigned NumVectorInstToHideOverhead = 10;
393 int MaxMergeDistance = 64;
394
395 if (Ty->isVectorTy() && SE &&
396 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
397 return NumVectorInstToHideOverhead;
398
399 // In many cases the address computation is not merged into the instruction
400 // addressing mode.
401 return 1;
402}
403
404int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
405 Type *SubTp) {
406 if (Kind == TTI::SK_Broadcast) {
407 static const CostTblEntry NEONDupTbl[] = {
408 // VDUP handles these cases.
409 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
410 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
411 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
412 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
413 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
414 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
415
416 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
417 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
418 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
419 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};
420
421 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
422
423 if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
424 LT.second))
425 return LT.first * Entry->Cost;
426
427 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
428 }
429 if (Kind == TTI::SK_Reverse) {
430 static const CostTblEntry NEONShuffleTbl[] = {
431 // Reverse shuffle cost one instruction if we are shuffling within a
432 // double word (vrev) or two if we shuffle a quad word (vrev, vext).
433 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
434 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
435 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
436 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
437 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
438 {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},
439
440 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
441 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
442 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
443 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
444
445 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
446
447 if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
448 LT.second))
449 return LT.first * Entry->Cost;
450
451 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
452 }
453 if (Kind == TTI::SK_Select) {
454 static const CostTblEntry NEONSelShuffleTbl[] = {
455 // Select shuffle cost table for ARM. Cost is the number of instructions
456 // required to create the shuffled vector.
457
458 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
459 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
460 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
461 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
462
463 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
464 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
465 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
466
467 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
468
469 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
470
471 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
472 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
473 ISD::VECTOR_SHUFFLE, LT.second))
474 return LT.first * Entry->Cost;
475 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
476 }
477 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
478}
479
480int ARMTTIImpl::getArithmeticInstrCost(
481 unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
482 TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
483 TTI::OperandValueProperties Opd2PropInfo,
484 ArrayRef<const Value *> Args) {
485 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
486 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
487
488 const unsigned FunctionCallDivCost = 20;
489 const unsigned ReciprocalDivCost = 10;
490 static const CostTblEntry CostTbl[] = {
491 // Division.
492 // These costs are somewhat random. Choose a cost of 20 to indicate that
493 // vectorizing devision (added function call) is going to be very expensive.
494 // Double registers types.
495 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
496 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
497 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
498 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
499 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
500 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
501 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
502 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
503 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
504 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
505 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
506 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
507 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
508 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
509 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
510 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
511 // Quad register types.
512 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
513 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
514 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
515 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
516 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
517 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
518 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
519 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
520 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
521 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
522 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
523 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
524 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
525 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
526 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
527 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
528 // Multiplication.
529 };
530
531 if (ST->hasNEON())
532 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
533 return LT.first * Entry->Cost;
534
535 int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
536 Opd1PropInfo, Opd2PropInfo);
537
538 // This is somewhat of a hack. The problem that we are facing is that SROA
539 // creates a sequence of shift, and, or instructions to construct values.
540 // These sequences are recognized by the ISel and have zero-cost. Not so for
541 // the vectorized code. Because we have support for v2i64 but not i64 those
542 // sequences look particularly beneficial to vectorize.
543 // To work around this we increase the cost of v2i64 operations to make them
544 // seem less beneficial.
545 if (LT.second == MVT::v2i64 &&
546 Op2Info == TargetTransformInfo::OK_UniformConstantValue)
547 Cost += 4;
548
549 return Cost;
550}
551
552int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
553 unsigned AddressSpace, const Instruction *I) {
554 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
555
556 if (Src->isVectorTy() && Alignment != 16 &&
557 Src->getVectorElementType()->isDoubleTy()) {
558 // Unaligned loads/stores are extremely inefficient.
559 // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
560 return LT.first * 4;
561 }
562 return LT.first;
563}
564
565int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
566 unsigned Factor,
567 ArrayRef<unsigned> Indices,
568 unsigned Alignment,
569 unsigned AddressSpace,
570 bool UseMaskForCond,
571 bool UseMaskForGaps) {
572 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 572, __PRETTY_FUNCTION__))
;
573 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 573, __PRETTY_FUNCTION__))
;
574
575 // vldN/vstN doesn't support vector types of i64/f64 element.
576 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
577
578 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
579 !UseMaskForCond && !UseMaskForGaps) {
580 unsigned NumElts = VecTy->getVectorNumElements();
581 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
582
583 // vldN/vstN only support legal vector types of size 64 or 128 in bits.
584 // Accesses having vector types that are a multiple of 128 bits can be
585 // matched to more than one vldN/vstN instruction.
586 if (NumElts % Factor == 0 &&
587 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
588 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
589 }
590
591 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
592 Alignment, AddressSpace,
593 UseMaskForCond, UseMaskForGaps);
594}
595
596void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
597 TTI::UnrollingPreferences &UP) {
598 // Only currently enable these preferences for M-Class cores.
599 if (!ST->isMClass())
600 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
601
602 // Disable loop unrolling for Oz and Os.
603 UP.OptSizeThreshold = 0;
604 UP.PartialOptSizeThreshold = 0;
605 if (L->getHeader()->getParent()->hasOptSize())
606 return;
607
608 // Only enable on Thumb-2 targets.
609 if (!ST->isThumb2())
610 return;
611
612 SmallVector<BasicBlock*, 4> ExitingBlocks;
613 L->getExitingBlocks(ExitingBlocks);
614 LLVM_DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
615 << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
616 << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
;
617
618 // Only allow another exit other than the latch. This acts as an early exit
619 // as it mirrors the profitability calculation of the runtime unroller.
620 if (ExitingBlocks.size() > 2)
621 return;
622
623 // Limit the CFG of the loop body for targets with a branch predictor.
624 // Allowing 4 blocks permits if-then-else diamonds in the body.
625 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
626 return;
627
628 // Scan the loop: don't unroll loops with calls as this could prevent
629 // inlining.
630 unsigned Cost = 0;
631 for (auto *BB : L->getBlocks()) {
632 for (auto &I : *BB) {
633 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
634 ImmutableCallSite CS(&I);
635 if (const Function *F = CS.getCalledFunction()) {
636 if (!isLoweredToCall(F))
637 continue;
638 }
639 return;
640 }
641 SmallVector<const Value*, 4> Operands(I.value_op_begin(),
642 I.value_op_end());
643 Cost += getUserCost(&I, Operands);
644 }
645 }
646
647 LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Cost of loop: " << Cost <<
"\n"; } } while (false)
;
648
649 UP.Partial = true;
650 UP.Runtime = true;
651 UP.UnrollRemainder = true;
652 UP.DefaultUnrollRuntimeCount = 4;
653 UP.UnrollAndJam = true;
654 UP.UnrollAndJamInnerLoopThreshold = 60;
655
656 // Force unrolling small loops can be very useful because of the branch
657 // taken cost of the backedge.
658 if (Cost < 12)
659 UP.Force = true;
660}

/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

1//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM addressing mode implementation stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
14#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
15
16#include "llvm/ADT/APFloat.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/bit.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22
23namespace llvm {
24
25/// ARM_AM - ARM Addressing Mode Stuff
26namespace ARM_AM {
27 enum ShiftOpc {
28 no_shift = 0,
29 asr,
30 lsl,
31 lsr,
32 ror,
33 rrx
34 };
35
36 enum AddrOpc {
37 sub = 0,
38 add
39 };
40
41 inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
42
43 inline const char *getShiftOpcStr(ShiftOpc Op) {
44 switch (Op) {
45 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 45)
;
46 case ARM_AM::asr: return "asr";
47 case ARM_AM::lsl: return "lsl";
48 case ARM_AM::lsr: return "lsr";
49 case ARM_AM::ror: return "ror";
50 case ARM_AM::rrx: return "rrx";
51 }
52 }
53
54 inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
55 switch (Op) {
56 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 56)
;
57 case ARM_AM::asr: return 2;
58 case ARM_AM::lsl: return 0;
59 case ARM_AM::lsr: return 1;
60 case ARM_AM::ror: return 3;
61 }
62 }
63
64 enum AMSubMode {
65 bad_am_submode = 0,
66 ia,
67 ib,
68 da,
69 db
70 };
71
72 inline const char *getAMSubModeStr(AMSubMode Mode) {
73 switch (Mode) {
74 default: llvm_unreachable("Unknown addressing sub-mode!")::llvm::llvm_unreachable_internal("Unknown addressing sub-mode!"
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 74)
;
75 case ARM_AM::ia: return "ia";
76 case ARM_AM::ib: return "ib";
77 case ARM_AM::da: return "da";
78 case ARM_AM::db: return "db";
79 }
80 }
81
82 /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
83 ///
84 inline unsigned rotr32(unsigned Val, unsigned Amt) {
85 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 85, __PRETTY_FUNCTION__))
;
86 return (Val >> Amt) | (Val << ((32-Amt)&31));
87 }
88
89 /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
90 ///
91 inline unsigned rotl32(unsigned Val, unsigned Amt) {
92 assert(Amt < 32 && "Invalid rotate amount")((Amt < 32 && "Invalid rotate amount") ? static_cast
<void> (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 92, __PRETTY_FUNCTION__))
;
93 return (Val << Amt) | (Val >> ((32-Amt)&31));
94 }
95
96 //===--------------------------------------------------------------------===//
97 // Addressing Mode #1: shift_operand with registers
98 //===--------------------------------------------------------------------===//
99 //
100 // This 'addressing mode' is used for arithmetic instructions. It can
101 // represent things like:
102 // reg
103 // reg [asr|lsl|lsr|ror|rrx] reg
104 // reg [asr|lsl|lsr|ror|rrx] imm
105 //
106 // This is stored three operands [rega, regb, opc]. The first is the base
107 // reg, the second is the shift amount (or reg0 if not present or imm). The
108 // third operand encodes the shift opcode and the imm if a reg isn't present.
109 //
110 inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
111 return ShOp | (Imm << 3);
112 }
113 inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; }
114 inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); }
115
116 /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
117 /// the 8-bit imm value.
118 inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; }
119 /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
120 /// the rotate amount.
121 inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; }
122
123 /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
124 /// computing the rotate amount to use. If this immediate value cannot be
125 /// handled with a single shifter-op, determine a good rotate amount that will
126 /// take a maximal chunk of bits out of the immediate.
127 inline unsigned getSOImmValRotate(unsigned Imm) {
128 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
129 // of zero.
130 if ((Imm & ~255U) == 0) return 0;
131
132 // Use CTZ to compute the rotate amount.
133 unsigned TZ = countTrailingZeros(Imm);
134
135 // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
136 // not 9.
137 unsigned RotAmt = TZ & ~1;
138
139 // If we can handle this spread, return it.
140 if ((rotr32(Imm, RotAmt) & ~255U) == 0)
141 return (32-RotAmt)&31; // HW rotates right, not left.
142
143 // For values like 0xF000000F, we should ignore the low 6 bits, then
144 // retry the hunt.
145 if (Imm & 63U) {
146 unsigned TZ2 = countTrailingZeros(Imm & ~63U);
147 unsigned RotAmt2 = TZ2 & ~1;
148 if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
149 return (32-RotAmt2)&31; // HW rotates right, not left.
150 }
151
152 // Otherwise, we have no way to cover this span of bits with a single
153 // shifter_op immediate. Return a chunk of bits that will be useful to
154 // handle.
155 return (32-RotAmt)&31; // HW rotates right, not left.
156 }
157
158 /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
159 /// into an shifter_operand immediate operand, return the 12-bit encoding for
160 /// it. If not, return -1.
161 inline int getSOImmVal(unsigned Arg) {
162 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
163 // of zero.
164 if ((Arg & ~255U) == 0) return Arg;
165
166 unsigned RotAmt = getSOImmValRotate(Arg);
167
168 // If this cannot be handled with a single shifter_op, bail out.
169 if (rotr32(~255U, RotAmt) & Arg)
170 return -1;
171
172 // Encode this correctly.
173 return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
174 }
175
176 /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
177 /// or'ing together two SOImmVal's.
178 inline bool isSOImmTwoPartVal(unsigned V) {
179 // If this can be handled with a single shifter_op, bail out.
180 V = rotr32(~255U, getSOImmValRotate(V)) & V;
181 if (V == 0)
182 return false;
183
184 // If this can be handled with two shifter_op's, accept.
185 V = rotr32(~255U, getSOImmValRotate(V)) & V;
186 return V == 0;
187 }
188
189 /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
190 /// return the first chunk of it.
191 inline unsigned getSOImmTwoPartFirst(unsigned V) {
192 return rotr32(255U, getSOImmValRotate(V)) & V;
193 }
194
195 /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
196 /// return the second chunk of it.
197 inline unsigned getSOImmTwoPartSecond(unsigned V) {
198 // Mask out the first hunk.
199 V = rotr32(~255U, getSOImmValRotate(V)) & V;
200
201 // Take what's left.
202 assert(V == (rotr32(255U, getSOImmValRotate(V)) & V))((V == (rotr32(255U, getSOImmValRotate(V)) & V)) ? static_cast
<void> (0) : __assert_fail ("V == (rotr32(255U, getSOImmValRotate(V)) & V)"
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 202, __PRETTY_FUNCTION__))
;
203 return V;
204 }
205
206 /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
207 /// by a left shift. Returns the shift amount to use.
208 inline unsigned getThumbImmValShift(unsigned Imm) {
209 // 8-bit (or less) immediates are trivially immediate operand with a shift
210 // of zero.
211 if ((Imm & ~255U) == 0) return 0;
20
Assuming the condition is false
21
Taking false branch
212
213 // Use CTZ to compute the shift amount.
214 return countTrailingZeros(Imm);
22
Calling 'countTrailingZeros<unsigned int>'
29
Returning from 'countTrailingZeros<unsigned int>'
30
Returning the value 32
215 }
216
217 /// isThumbImmShiftedVal - Return true if the specified value can be obtained
218 /// by left shifting a 8-bit immediate.
219 inline bool isThumbImmShiftedVal(unsigned V) {
220 // If this can be handled with
221 V = (~255U << getThumbImmValShift(V)) & V;
19
Calling 'getThumbImmValShift'
31
Returning from 'getThumbImmValShift'
32
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
222 return V == 0;
223 }
224
225 /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
226 /// by a left shift. Returns the shift amount to use.
227 inline unsigned getThumbImm16ValShift(unsigned Imm) {
228 // 16-bit (or less) immediates are trivially immediate operand with a shift
229 // of zero.
230 if ((Imm & ~65535U) == 0) return 0;
231
232 // Use CTZ to compute the shift amount.
233 return countTrailingZeros(Imm);
234 }
235
236 /// isThumbImm16ShiftedVal - Return true if the specified value can be
237 /// obtained by left shifting a 16-bit immediate.
238 inline bool isThumbImm16ShiftedVal(unsigned V) {
239 // If this can be handled with
240 V = (~65535U << getThumbImm16ValShift(V)) & V;
241 return V == 0;
242 }
243
244 /// getThumbImmNonShiftedVal - If V is a value that satisfies
245 /// isThumbImmShiftedVal, return the non-shiftd value.
246 inline unsigned getThumbImmNonShiftedVal(unsigned V) {
247 return V >> getThumbImmValShift(V);
248 }
249
250
251 /// getT2SOImmValSplat - Return the 12-bit encoded representation
252 /// if the specified value can be obtained by splatting the low 8 bits
253 /// into every other byte or every byte of a 32-bit value. i.e.,
254 /// 00000000 00000000 00000000 abcdefgh control = 0
255 /// 00000000 abcdefgh 00000000 abcdefgh control = 1
256 /// abcdefgh 00000000 abcdefgh 00000000 control = 2
257 /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
258 /// Return -1 if none of the above apply.
259 /// See ARM Reference Manual A6.3.2.
260 inline int getT2SOImmValSplatVal(unsigned V) {
261 unsigned u, Vs, Imm;
262 // control = 0
263 if ((V & 0xffffff00) == 0)
264 return V;
265
266 // If the value is zeroes in the first byte, just shift those off
267 Vs = ((V & 0xff) == 0) ? V >> 8 : V;
268 // Any passing value only has 8 bits of payload, splatted across the word
269 Imm = Vs & 0xff;
270 // Likewise, any passing values have the payload splatted into the 3rd byte
271 u = Imm | (Imm << 16);
272
273 // control = 1 or 2
274 if (Vs == u)
275 return (((Vs == V) ? 1 : 2) << 8) | Imm;
276
277 // control = 3
278 if (Vs == (u | (u << 8)))
279 return (3 << 8) | Imm;
280
281 return -1;
282 }
283
284 /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
285 /// specified value is a rotated 8-bit value. Return -1 if no rotation
286 /// encoding is possible.
287 /// See ARM Reference Manual A6.3.2.
288 inline int getT2SOImmValRotateVal(unsigned V) {
289 unsigned RotAmt = countLeadingZeros(V);
290 if (RotAmt >= 24)
291 return -1;
292
293 // If 'Arg' can be handled with a single shifter_op return the value.
294 if ((rotr32(0xff000000U, RotAmt) & V) == V)
295 return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
296
297 return -1;
298 }
299
300 /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
301 /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
302 /// encoding for it. If not, return -1.
303 /// See ARM Reference Manual A6.3.2.
304 inline int getT2SOImmVal(unsigned Arg) {
305 // If 'Arg' is an 8-bit splat, then get the encoded value.
306 int Splat = getT2SOImmValSplatVal(Arg);
307 if (Splat != -1)
308 return Splat;
309
310 // If 'Arg' can be handled with a single shifter_op return the value.
311 int Rot = getT2SOImmValRotateVal(Arg);
312 if (Rot != -1)
313 return Rot;
314
315 return -1;
316 }
317
318 inline unsigned getT2SOImmValRotate(unsigned V) {
319 if ((V & ~255U) == 0) return 0;
320 // Use CTZ to compute the rotate amount.
321 unsigned RotAmt = countTrailingZeros(V);
322 return (32 - RotAmt) & 31;
323 }
324
325 inline bool isT2SOImmTwoPartVal(unsigned Imm) {
326 unsigned V = Imm;
327 // Passing values can be any combination of splat values and shifter
328 // values. If this can be handled with a single shifter or splat, bail
329 // out. Those should be handled directly, not with a two-part val.
330 if (getT2SOImmValSplatVal(V) != -1)
331 return false;
332 V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
333 if (V == 0)
334 return false;
335
336 // If this can be handled as an immediate, accept.
337 if (getT2SOImmVal(V) != -1) return true;
338
339 // Likewise, try masking out a splat value first.
340 V = Imm;
341 if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
342 V &= ~0xff00ff00U;
343 else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
344 V &= ~0x00ff00ffU;
345 // If what's left can be handled as an immediate, accept.
346 if (getT2SOImmVal(V) != -1) return true;
347
348 // Otherwise, do not accept.
349 return false;
350 }
351
352 inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
353 assert (isT2SOImmTwoPartVal(Imm) &&((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __PRETTY_FUNCTION__))
354 "Immedate cannot be encoded as two part immediate!")((isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"
) ? static_cast<void> (0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __PRETTY_FUNCTION__))
;
355 // Try a shifter operand as one part
356 unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
357 // If the rest is encodable as an immediate, then return it.
358 if (getT2SOImmVal(V) != -1) return V;
359
360 // Try masking out a splat value first.
361 if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
362 return Imm & 0xff00ff00U;
363
364 // The other splat is all that's left as an option.
365 assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1)((getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1) ? static_cast
<void> (0) : __assert_fail ("getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1"
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 365, __PRETTY_FUNCTION__))
;
366 return Imm & 0x00ff00ffU;
367 }
368
369 inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
370 // Mask out the first hunk
371 Imm ^= getT2SOImmTwoPartFirst(Imm);
372 // Return what's left
373 assert (getT2SOImmVal(Imm) != -1 &&((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __PRETTY_FUNCTION__))
374 "Unable to encode second part of T2 two part SO immediate")((getT2SOImmVal(Imm) != -1 && "Unable to encode second part of T2 two part SO immediate"
) ? static_cast<void> (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __PRETTY_FUNCTION__))
;
375 return Imm;
376 }
377
378
379 //===--------------------------------------------------------------------===//
380 // Addressing Mode #2
381 //===--------------------------------------------------------------------===//
382 //
383 // This is used for most simple load/store instructions.
384 //
385 // addrmode2 := reg +/- reg shop imm
386 // addrmode2 := reg +/- imm12
387 //
388 // The first operand is always a Reg. The second operand is a reg if in
389 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
390 // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
391 // fourth operand 16-17 encodes the index mode.
392 //
393 // If this addressing mode is a frame index (before prolog/epilog insertion
394 // and code rewriting), this operand will have the form: FI#, reg0, <offs>
395 // with no shift amount for the frame offset.
396 //
397 inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
398 unsigned IdxMode = 0) {
399 assert(Imm12 < (1 << 12) && "Imm too large!")((Imm12 < (1 << 12) && "Imm too large!") ? static_cast
<void> (0) : __assert_fail ("Imm12 < (1 << 12) && \"Imm too large!\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 399, __PRETTY_FUNCTION__))
;
400 bool isSub = Opc == sub;
401 return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
402 }
403 inline unsigned getAM2Offset(unsigned AM2Opc) {
404 return AM2Opc & ((1 << 12)-1);
405 }
406 inline AddrOpc getAM2Op(unsigned AM2Opc) {
407 return ((AM2Opc >> 12) & 1) ? sub : add;
408 }
409 inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
410 return (ShiftOpc)((AM2Opc >> 13) & 7);
411 }
412 inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); }
413
414 //===--------------------------------------------------------------------===//
415 // Addressing Mode #3
416 //===--------------------------------------------------------------------===//
417 //
418 // This is used for sign-extending loads, and load/store-pair instructions.
419 //
420 // addrmode3 := reg +/- reg
421 // addrmode3 := reg +/- imm8
422 //
423 // The first operand is always a Reg. The second operand is a reg if in
424 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
425 // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
426 // index mode.
427
428 /// getAM3Opc - This function encodes the addrmode3 opc field.
429 inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
430 unsigned IdxMode = 0) {
431 bool isSub = Opc == sub;
432 return ((int)isSub << 8) | Offset | (IdxMode << 9);
433 }
434 inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; }
435 inline AddrOpc getAM3Op(unsigned AM3Opc) {
436 return ((AM3Opc >> 8) & 1) ? sub : add;
437 }
438 inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); }
439
440 //===--------------------------------------------------------------------===//
441 // Addressing Mode #4
442 //===--------------------------------------------------------------------===//
443 //
444 // This is used for load / store multiple instructions.
445 //
446 // addrmode4 := reg, <mode>
447 //
448 // The four modes are:
449 // IA - Increment after
450 // IB - Increment before
451 // DA - Decrement after
452 // DB - Decrement before
453 // For VFP instructions, only the IA and DB modes are valid.
454
455 inline AMSubMode getAM4SubMode(unsigned Mode) {
456 return (AMSubMode)(Mode & 0x7);
457 }
458
459 inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; }
460
461 //===--------------------------------------------------------------------===//
462 // Addressing Mode #5
463 //===--------------------------------------------------------------------===//
464 //
465 // This is used for coprocessor instructions, such as FP load/stores.
466 //
467 // addrmode5 := reg +/- imm8*4
468 //
469 // The first operand is always a Reg. The second operand encodes the
470 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
471
472 /// getAM5Opc - This function encodes the addrmode5 opc field.
473 inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
474 bool isSub = Opc == sub;
475 return ((int)isSub << 8) | Offset;
476 }
477 inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; }
478 inline AddrOpc getAM5Op(unsigned AM5Opc) {
479 return ((AM5Opc >> 8) & 1) ? sub : add;
480 }
481
482 //===--------------------------------------------------------------------===//
483 // Addressing Mode #5 FP16
484 //===--------------------------------------------------------------------===//
485 //
486 // This is used for coprocessor instructions, such as 16-bit FP load/stores.
487 //
488 // addrmode5fp16 := reg +/- imm8*2
489 //
490 // The first operand is always a Reg. The second operand encodes the
491 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
492
493 /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
494 inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
495 bool isSub = Opc == sub;
496 return ((int)isSub << 8) | Offset;
497 }
498 inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
499 return AM5Opc & 0xFF;
500 }
501 inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
502 return ((AM5Opc >> 8) & 1) ? sub : add;
503 }
504
505 //===--------------------------------------------------------------------===//
506 // Addressing Mode #6
507 //===--------------------------------------------------------------------===//
508 //
509 // This is used for NEON load / store instructions.
510 //
511 // addrmode6 := reg with optional alignment
512 //
513 // This is stored in two operands [regaddr, align]. The first is the
514 // address register. The second operand is the value of the alignment
515 // specifier in bytes or zero if no explicit alignment.
516 // Valid alignments depend on the specific instruction.
517
518 //===--------------------------------------------------------------------===//
519 // NEON Modified Immediates
520 //===--------------------------------------------------------------------===//
521 //
522 // Several NEON instructions (e.g., VMOV) take a "modified immediate"
523 // vector operand, where a small immediate encoded in the instruction
524 // specifies a full NEON vector value. These modified immediates are
525 // represented here as encoded integers. The low 8 bits hold the immediate
526 // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
527 // the "Cmode" field of the instruction. The interfaces below treat the
528 // Op and Cmode values as a single 5-bit value.
529
530 inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
531 return (OpCmode << 8) | Val;
532 }
533 inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
534 return (ModImm >> 8) & 0x1f;
535 }
536 inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; }
537
538 /// decodeNEONModImm - Decode a NEON modified immediate value into the
539 /// element value and the element size in bits. (If the element size is
540 /// smaller than the vector, it is splatted into all the elements.)
541 inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
542 unsigned OpCmode = getNEONModImmOpCmode(ModImm);
543 unsigned Imm8 = getNEONModImmVal(ModImm);
544 uint64_t Val = 0;
545
546 if (OpCmode == 0xe) {
547 // 8-bit vector elements
548 Val = Imm8;
549 EltBits = 8;
550 } else if ((OpCmode & 0xc) == 0x8) {
551 // 16-bit vector elements
552 unsigned ByteNum = (OpCmode & 0x6) >> 1;
553 Val = Imm8 << (8 * ByteNum);
554 EltBits = 16;
555 } else if ((OpCmode & 0x8) == 0) {
556 // 32-bit vector elements, zero with one byte set
557 unsigned ByteNum = (OpCmode & 0x6) >> 1;
558 Val = Imm8 << (8 * ByteNum);
559 EltBits = 32;
560 } else if ((OpCmode & 0xe) == 0xc) {
561 // 32-bit vector elements, one byte with low bits set
562 unsigned ByteNum = 1 + (OpCmode & 0x1);
563 Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
564 EltBits = 32;
565 } else if (OpCmode == 0x1e) {
566 // 64-bit vector elements
567 for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
568 if ((ModImm >> ByteNum) & 1)
569 Val |= (uint64_t)0xff << (8 * ByteNum);
570 }
571 EltBits = 64;
572 } else {
573 llvm_unreachable("Unsupported NEON immediate")::llvm::llvm_unreachable_internal("Unsupported NEON immediate"
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 573)
;
574 }
575 return Val;
576 }
577
578 // Generic validation for single-byte immediate (0X00, 00X0, etc).
579 inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
580 assert(Size >= 1 && Size <= 4 && "Invalid size")((Size >= 1 && Size <= 4 && "Invalid size"
) ? static_cast<void> (0) : __assert_fail ("Size >= 1 && Size <= 4 && \"Invalid size\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 580, __PRETTY_FUNCTION__))
;
581 unsigned count = 0;
582 for (unsigned i = 0; i < Size; ++i) {
583 if (Value & 0xff) count++;
584 Value >>= 8;
585 }
586 return count == 1;
587 }
588
589 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
590 inline bool isNEONi16splat(unsigned Value) {
591 if (Value > 0xffff)
592 return false;
593 // i16 value with set bits only in one byte X0 or 0X.
594 return Value == 0 || isNEONBytesplat(Value, 2);
595 }
596
597 // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
598 inline unsigned encodeNEONi16splat(unsigned Value) {
599 assert(isNEONi16splat(Value) && "Invalid NEON splat value")((isNEONi16splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi16splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 599, __PRETTY_FUNCTION__))
;
600 if (Value >= 0x100)
601 Value = (Value >> 8) | 0xa00;
602 else
603 Value |= 0x800;
604 return Value;
605 }
606
607 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
608 inline bool isNEONi32splat(unsigned Value) {
609 // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
610 return Value == 0 || isNEONBytesplat(Value, 4);
611 }
612
613 /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
614 inline unsigned encodeNEONi32splat(unsigned Value) {
615 assert(isNEONi32splat(Value) && "Invalid NEON splat value")((isNEONi32splat(Value) && "Invalid NEON splat value"
) ? static_cast<void> (0) : __assert_fail ("isNEONi32splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-9~svn358520/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 615, __PRETTY_FUNCTION__))
;
616 if (Value >= 0x100 && Value <= 0xff00)
617 Value = (Value >> 8) | 0x200;
618 else if (Value > 0xffff && Value <= 0xff0000)
619 Value = (Value >> 16) | 0x400;
620 else if (Value > 0xffffff)
621 Value = (Value >> 24) | 0x600;
622 return Value;
623 }
624
625 //===--------------------------------------------------------------------===//
626 // Floating-point Immediates
627 //
628 inline float getFPImmFloat(unsigned Imm) {
629 // We expect an 8-bit binary encoding of a floating-point number here.
630
631 uint8_t Sign = (Imm >> 7) & 0x1;
632 uint8_t Exp = (Imm >> 4) & 0x7;
633 uint8_t Mantissa = Imm & 0xf;
634
635 // 8-bit FP IEEE Float Encoding
636 // abcd efgh aBbbbbbc defgh000 00000000 00000000
637 //
638 // where B = NOT(b);
639 uint32_t I = 0;
640 I |= Sign << 31;
641 I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
642 I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
643 I |= (Exp & 0x3) << 23;
644 I |= Mantissa << 19;
645 return bit_cast<float>(I);
646 }
647
648 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
649 /// floating-point value. If the value cannot be represented as an 8-bit
650 /// floating-point value, then return -1.
651 inline int getFP16Imm(const APInt &Imm) {
652 uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
653 int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
654 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
655
656 // We can handle 4 bits of mantissa.
657 // mantissa = (16+UInt(e:f:g:h))/16.
658 if (Mantissa & 0x3f)
659 return -1;
660 Mantissa >>= 6;
661
662 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
663 if (Exp < -3 || Exp > 4)
664 return -1;
665 Exp = ((Exp+3) & 0x7) ^ 4;
666
667 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
668 }
669
670 inline int getFP16Imm(const APFloat &FPImm) {
671 return getFP16Imm(FPImm.bitcastToAPInt());
672 }
673
674 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
675 /// floating-point value. If the value cannot be represented as an 8-bit
676 /// floating-point value, then return -1.
677 inline int getFP32Imm(const APInt &Imm) {
678 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
679 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
680 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
681
682 // We can handle 4 bits of mantissa.
683 // mantissa = (16+UInt(e:f:g:h))/16.
684 if (Mantissa & 0x7ffff)
685 return -1;
686 Mantissa >>= 19;
687 if ((Mantissa & 0xf) != Mantissa)
688 return -1;
689
690 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
691 if (Exp < -3 || Exp > 4)
692 return -1;
693 Exp = ((Exp+3) & 0x7) ^ 4;
694
695 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
696 }
697
698 inline int getFP32Imm(const APFloat &FPImm) {
699 return getFP32Imm(FPImm.bitcastToAPInt());
700 }
701
702 /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
703 /// floating-point value. If the value cannot be represented as an 8-bit
704 /// floating-point value, then return -1.
705 inline int getFP64Imm(const APInt &Imm) {
706 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
707 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
708 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
709
710 // We can handle 4 bits of mantissa.
711 // mantissa = (16+UInt(e:f:g:h))/16.
712 if (Mantissa & 0xffffffffffffULL)
713 return -1;
714 Mantissa >>= 48;
715 if ((Mantissa & 0xf) != Mantissa)
716 return -1;
717
718 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
719 if (Exp < -3 || Exp > 4)
720 return -1;
721 Exp = ((Exp+3) & 0x7) ^ 4;
722
723 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
724 }
725
726 inline int getFP64Imm(const APFloat &FPImm) {
727 return getFP64Imm(FPImm.bitcastToAPInt());
728 }
729
730} // end namespace ARM_AM
731} // end namespace llvm
732
733#endif
734

/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include "llvm/Support/SwapByteOrder.h"
18#include <algorithm>
19#include <cassert>
20#include <climits>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42/// The behavior an operation has on an input of 0.
43enum ZeroBehavior {
44 /// The returned value is undefined.
45 ZB_Undefined,
46 /// The returned value is numeric_limits<T>::max()
47 ZB_Max,
48 /// The returned value is numeric_limits<T>::digits
49 ZB_Width
50};
51
52namespace detail {
53template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
54 static std::size_t count(T Val, ZeroBehavior) {
55 if (!Val)
56 return std::numeric_limits<T>::digits;
57 if (Val & 0x1)
58 return 0;
59
60 // Bisection method.
61 std::size_t ZeroBits = 0;
62 T Shift = std::numeric_limits<T>::digits >> 1;
63 T Mask = std::numeric_limits<T>::max() >> Shift;
64 while (Shift) {
65 if ((Val & Mask) == 0) {
66 Val >>= Shift;
67 ZeroBits |= Shift;
68 }
69 Shift >>= 1;
70 Mask >>= Shift;
71 }
72 return ZeroBits;
73 }
74};
75
76#if __GNUC__4 >= 4 || defined(_MSC_VER)
77template <typename T> struct TrailingZerosCounter<T, 4> {
78 static std::size_t count(T Val, ZeroBehavior ZB) {
79 if (ZB != ZB_Undefined && Val == 0)
24
Assuming 'Val' is equal to 0
25
Taking true branch
80 return 32;
26
Returning the value 32
81
82#if __has_builtin(__builtin_ctz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
83 return __builtin_ctz(Val);
84#elif defined(_MSC_VER)
85 unsigned long Index;
86 _BitScanForward(&Index, Val);
87 return Index;
88#endif
89 }
90};
91
92#if !defined(_MSC_VER) || defined(_M_X64)
93template <typename T> struct TrailingZerosCounter<T, 8> {
94 static std::size_t count(T Val, ZeroBehavior ZB) {
95 if (ZB != ZB_Undefined && Val == 0)
96 return 64;
97
98#if __has_builtin(__builtin_ctzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
99 return __builtin_ctzll(Val);
100#elif defined(_MSC_VER)
101 unsigned long Index;
102 _BitScanForward64(&Index, Val);
103 return Index;
104#endif
105 }
106};
107#endif
108#endif
109} // namespace detail
110
111/// Count number of 0's from the least significant bit to the most
112/// stopping at the first 1.
113///
114/// Only unsigned integral types are allowed.
115///
116/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
117/// valid arguments.
118template <typename T>
119std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
120 static_assert(std::numeric_limits<T>::is_integer &&
121 !std::numeric_limits<T>::is_signed,
122 "Only unsigned integral types are allowed.");
123 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
23
Calling 'TrailingZerosCounter::count'
27
Returning from 'TrailingZerosCounter::count'
28
Returning the value 32
124}
125
126namespace detail {
127template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
128 static std::size_t count(T Val, ZeroBehavior) {
129 if (!Val)
130 return std::numeric_limits<T>::digits;
131
132 // Bisection method.
133 std::size_t ZeroBits = 0;
134 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
135 T Tmp = Val >> Shift;
136 if (Tmp)
137 Val = Tmp;
138 else
139 ZeroBits |= Shift;
140 }
141 return ZeroBits;
142 }
143};
144
145#if __GNUC__4 >= 4 || defined(_MSC_VER)
146template <typename T> struct LeadingZerosCounter<T, 4> {
147 static std::size_t count(T Val, ZeroBehavior ZB) {
148 if (ZB != ZB_Undefined && Val == 0)
149 return 32;
150
151#if __has_builtin(__builtin_clz)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
152 return __builtin_clz(Val);
153#elif defined(_MSC_VER)
154 unsigned long Index;
155 _BitScanReverse(&Index, Val);
156 return Index ^ 31;
157#endif
158 }
159};
160
161#if !defined(_MSC_VER) || defined(_M_X64)
162template <typename T> struct LeadingZerosCounter<T, 8> {
163 static std::size_t count(T Val, ZeroBehavior ZB) {
164 if (ZB != ZB_Undefined && Val == 0)
165 return 64;
166
167#if __has_builtin(__builtin_clzll)1 || LLVM_GNUC_PREREQ(4, 0, 0)((4 << 20) + (2 << 10) + 1 >= ((4) << 20
) + ((0) << 10) + (0))
168 return __builtin_clzll(Val);
169#elif defined(_MSC_VER)
170 unsigned long Index;
171 _BitScanReverse64(&Index, Val);
172 return Index ^ 63;
173#endif
174 }
175};
176#endif
177#endif
178} // namespace detail
179
180/// Count number of 0's from the most significant bit to the least
181/// stopping at the first 1.
182///
183/// Only unsigned integral types are allowed.
184///
185/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
186/// valid arguments.
187template <typename T>
188std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
189 static_assert(std::numeric_limits<T>::is_integer &&
190 !std::numeric_limits<T>::is_signed,
191 "Only unsigned integral types are allowed.");
192 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
193}
194
195/// Get the index of the first set bit starting from the least
196/// significant bit.
197///
198/// Only unsigned integral types are allowed.
199///
200/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
201/// valid arguments.
202template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
203 if (ZB == ZB_Max && Val == 0)
204 return std::numeric_limits<T>::max();
205
206 return countTrailingZeros(Val, ZB_Undefined);
207}
208
209/// Create a bitmask with the N right-most bits set to 1, and all other
210/// bits set to 0. Only unsigned types are allowed.
211template <typename T> T maskTrailingOnes(unsigned N) {
212 static_assert(std::is_unsigned<T>::value, "Invalid type!");
213 const unsigned Bits = CHAR_BIT8 * sizeof(T);
214 assert(N <= Bits && "Invalid bit index")((N <= Bits && "Invalid bit index") ? static_cast<
void> (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 214, __PRETTY_FUNCTION__))
;
215 return N == 0 ? 0 : (T(-1) >> (Bits - N));
216}
217
218/// Create a bitmask with the N left-most bits set to 1, and all other
219/// bits set to 0. Only unsigned types are allowed.
220template <typename T> T maskLeadingOnes(unsigned N) {
221 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
222}
223
224/// Create a bitmask with the N right-most bits set to 0, and all other
225/// bits set to 1. Only unsigned types are allowed.
226template <typename T> T maskTrailingZeros(unsigned N) {
227 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
228}
229
230/// Create a bitmask with the N left-most bits set to 0, and all other
231/// bits set to 1. Only unsigned types are allowed.
232template <typename T> T maskLeadingZeros(unsigned N) {
233 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
234}
235
236/// Get the index of the last set bit starting from the least
237/// significant bit.
238///
239/// Only unsigned integral types are allowed.
240///
241/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
242/// valid arguments.
243template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
244 if (ZB == ZB_Max && Val == 0)
245 return std::numeric_limits<T>::max();
246
247 // Use ^ instead of - because both gcc and llvm can remove the associated ^
248 // in the __builtin_clz intrinsic on x86.
249 return countLeadingZeros(Val, ZB_Undefined) ^
250 (std::numeric_limits<T>::digits - 1);
251}
252
253/// Macro compressed bit reversal table for 256 bits.
254///
255/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
256static const unsigned char BitReverseTable256[256] = {
257#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
258#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
259#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
260 R6(0), R6(2), R6(1), R6(3)
261#undef R2
262#undef R4
263#undef R6
264};
265
266/// Reverse the bits in \p Val.
267template <typename T>
268T reverseBits(T Val) {
269 unsigned char in[sizeof(Val)];
270 unsigned char out[sizeof(Val)];
271 std::memcpy(in, &Val, sizeof(Val));
272 for (unsigned i = 0; i < sizeof(Val); ++i)
273 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
274 std::memcpy(&Val, out, sizeof(Val));
275 return Val;
276}
277
278// NOTE: The following support functions use the _32/_64 extensions instead of
279// type overloading so that signed and unsigned integers can be used without
280// ambiguity.
281
282/// Return the high 32 bits of a 64 bit value.
283constexpr inline uint32_t Hi_32(uint64_t Value) {
284 return static_cast<uint32_t>(Value >> 32);
285}
286
287/// Return the low 32 bits of a 64 bit value.
288constexpr inline uint32_t Lo_32(uint64_t Value) {
289 return static_cast<uint32_t>(Value);
290}
291
292/// Make a 64-bit integer from a high / low pair of 32-bit integers.
293constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
294 return ((uint64_t)High << 32) | (uint64_t)Low;
295}
296
297/// Checks if an integer fits into the given bit width.
298template <unsigned N> constexpr inline bool isInt(int64_t x) {
299 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
300}
301// Template specializations to get better code for common cases.
302template <> constexpr inline bool isInt<8>(int64_t x) {
303 return static_cast<int8_t>(x) == x;
304}
305template <> constexpr inline bool isInt<16>(int64_t x) {
306 return static_cast<int16_t>(x) == x;
307}
308template <> constexpr inline bool isInt<32>(int64_t x) {
309 return static_cast<int32_t>(x) == x;
310}
311
312/// Checks if a signed integer is an N bit number shifted left by S.
313template <unsigned N, unsigned S>
314constexpr inline bool isShiftedInt(int64_t x) {
315 static_assert(
316 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
317 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
318 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
319}
320
321/// Checks if an unsigned integer fits into the given bit width.
322///
323/// This is written as two functions rather than as simply
324///
325/// return N >= 64 || X < (UINT64_C(1) << N);
326///
327/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
328/// left too many places.
329template <unsigned N>
330constexpr inline typename std::enable_if<(N < 64), bool>::type
331isUInt(uint64_t X) {
332 static_assert(N > 0, "isUInt<0> doesn't make sense");
333 return X < (UINT64_C(1)1UL << (N));
334}
335template <unsigned N>
336constexpr inline typename std::enable_if<N >= 64, bool>::type
337isUInt(uint64_t X) {
338 return true;
339}
340
341// Template specializations to get better code for common cases.
342template <> constexpr inline bool isUInt<8>(uint64_t x) {
343 return static_cast<uint8_t>(x) == x;
344}
345template <> constexpr inline bool isUInt<16>(uint64_t x) {
346 return static_cast<uint16_t>(x) == x;
347}
348template <> constexpr inline bool isUInt<32>(uint64_t x) {
349 return static_cast<uint32_t>(x) == x;
350}
351
352/// Checks if a unsigned integer is an N bit number shifted left by S.
353template <unsigned N, unsigned S>
354constexpr inline bool isShiftedUInt(uint64_t x) {
355 static_assert(
356 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
357 static_assert(N + S <= 64,
358 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
359 // Per the two static_asserts above, S must be strictly less than 64. So
360 // 1 << S is not undefined behavior.
361 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
362}
363
364/// Gets the maximum value for a N-bit unsigned integer.
365inline uint64_t maxUIntN(uint64_t N) {
366 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 366, __PRETTY_FUNCTION__))
;
367
368 // uint64_t(1) << 64 is undefined behavior, so we can't do
369 // (uint64_t(1) << N) - 1
370 // without checking first that N != 64. But this works and doesn't have a
371 // branch.
372 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
373}
374
375/// Gets the minimum value for a N-bit signed integer.
376inline int64_t minIntN(int64_t N) {
377 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 377, __PRETTY_FUNCTION__))
;
378
379 return -(UINT64_C(1)1UL<<(N-1));
380}
381
382/// Gets the maximum value for a N-bit signed integer.
383inline int64_t maxIntN(int64_t N) {
384 assert(N > 0 && N <= 64 && "integer width out of range")((N > 0 && N <= 64 && "integer width out of range"
) ? static_cast<void> (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 384, __PRETTY_FUNCTION__))
;
385
386 // This relies on two's complement wraparound when N == 64, so we convert to
387 // int64_t only at the very end to avoid UB.
388 return (UINT64_C(1)1UL << (N - 1)) - 1;
389}
390
391/// Checks if an unsigned integer fits into the given (dynamic) bit width.
392inline bool isUIntN(unsigned N, uint64_t x) {
393 return N >= 64 || x <= maxUIntN(N);
394}
395
396/// Checks if an signed integer fits into the given (dynamic) bit width.
397inline bool isIntN(unsigned N, int64_t x) {
398 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
399}
400
401/// Return true if the argument is a non-empty sequence of ones starting at the
402/// least significant bit with the remainder zero (32 bit version).
403/// Ex. isMask_32(0x0000FFFFU) == true.
404constexpr inline bool isMask_32(uint32_t Value) {
405 return Value && ((Value + 1) & Value) == 0;
406}
407
408/// Return true if the argument is a non-empty sequence of ones starting at the
409/// least significant bit with the remainder zero (64 bit version).
410constexpr inline bool isMask_64(uint64_t Value) {
411 return Value && ((Value + 1) & Value) == 0;
412}
413
414/// Return true if the argument contains a non-empty sequence of ones with the
415/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
416constexpr inline bool isShiftedMask_32(uint32_t Value) {
417 return Value && isMask_32((Value - 1) | Value);
418}
419
420/// Return true if the argument contains a non-empty sequence of ones with the
421/// remainder zero (64 bit version.)
422constexpr inline bool isShiftedMask_64(uint64_t Value) {
423 return Value && isMask_64((Value - 1) | Value);
424}
425
426/// Return true if the argument is a power of two > 0.
427/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
428constexpr inline bool isPowerOf2_32(uint32_t Value) {
429 return Value && !(Value & (Value - 1));
430}
431
432/// Return true if the argument is a power of two > 0 (64 bit edition.)
433constexpr inline bool isPowerOf2_64(uint64_t Value) {
434 return Value && !(Value & (Value - 1));
435}
436
437/// Return a byte-swapped representation of the 16-bit argument.
438inline uint16_t ByteSwap_16(uint16_t Value) {
439 return sys::SwapByteOrder_16(Value);
440}
441
442/// Return a byte-swapped representation of the 32-bit argument.
443inline uint32_t ByteSwap_32(uint32_t Value) {
444 return sys::SwapByteOrder_32(Value);
445}
446
447/// Return a byte-swapped representation of the 64-bit argument.
448inline uint64_t ByteSwap_64(uint64_t Value) {
449 return sys::SwapByteOrder_64(Value);
450}
451
452/// Count the number of ones from the most significant bit to the first
453/// zero bit.
454///
455/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
456/// Only unsigned integral types are allowed.
457///
458/// \param ZB the behavior on an input of all ones. Only ZB_Width and
459/// ZB_Undefined are valid arguments.
460template <typename T>
461std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
462 static_assert(std::numeric_limits<T>::is_integer &&
463 !std::numeric_limits<T>::is_signed,
464 "Only unsigned integral types are allowed.");
465 return countLeadingZeros<T>(~Value, ZB);
466}
467
468/// Count the number of ones from the least significant bit to the first
469/// zero bit.
470///
471/// Ex. countTrailingOnes(0x00FF00FF) == 8.
472/// Only unsigned integral types are allowed.
473///
474/// \param ZB the behavior on an input of all ones. Only ZB_Width and
475/// ZB_Undefined are valid arguments.
476template <typename T>
477std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
478 static_assert(std::numeric_limits<T>::is_integer &&
479 !std::numeric_limits<T>::is_signed,
480 "Only unsigned integral types are allowed.");
481 return countTrailingZeros<T>(~Value, ZB);
482}
483
484namespace detail {
485template <typename T, std::size_t SizeOfT> struct PopulationCounter {
486 static unsigned count(T Value) {
487 // Generic version, forward to 32 bits.
488 static_assert(SizeOfT <= 4, "Not implemented!");
489#if __GNUC__4 >= 4
490 return __builtin_popcount(Value);
491#else
492 uint32_t v = Value;
493 v = v - ((v >> 1) & 0x55555555);
494 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
495 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
496#endif
497 }
498};
499
500template <typename T> struct PopulationCounter<T, 8> {
501 static unsigned count(T Value) {
502#if __GNUC__4 >= 4
503 return __builtin_popcountll(Value);
504#else
505 uint64_t v = Value;
506 v = v - ((v >> 1) & 0x5555555555555555ULL);
507 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
508 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
509 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
510#endif
511 }
512};
513} // namespace detail
514
515/// Count the number of set bits in a value.
516/// Ex. countPopulation(0xF000F000) = 8
517/// Returns 0 if the word is zero.
518template <typename T>
519inline unsigned countPopulation(T Value) {
520 static_assert(std::numeric_limits<T>::is_integer &&
521 !std::numeric_limits<T>::is_signed,
522 "Only unsigned integral types are allowed.");
523 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
524}
525
526/// Return the log base 2 of the specified value.
527inline double Log2(double Value) {
528#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
529 return __builtin_log(Value) / __builtin_log(2.0);
530#else
531 return log2(Value);
532#endif
533}
534
535/// Return the floor log base 2 of the specified value, -1 if the value is zero.
536/// (32 bit edition.)
537/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
538inline unsigned Log2_32(uint32_t Value) {
539 return 31 - countLeadingZeros(Value);
540}
541
542/// Return the floor log base 2 of the specified value, -1 if the value is zero.
543/// (64 bit edition.)
544inline unsigned Log2_64(uint64_t Value) {
545 return 63 - countLeadingZeros(Value);
546}
547
548/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
549/// (32 bit edition).
550/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
551inline unsigned Log2_32_Ceil(uint32_t Value) {
552 return 32 - countLeadingZeros(Value - 1);
553}
554
555/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
556/// (64 bit edition.)
557inline unsigned Log2_64_Ceil(uint64_t Value) {
558 return 64 - countLeadingZeros(Value - 1);
559}
560
561/// Return the greatest common divisor of the values using Euclid's algorithm.
562inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
563 while (B) {
564 uint64_t T = B;
565 B = A % B;
566 A = T;
567 }
568 return A;
569}
570
571/// This function takes a 64-bit integer and returns the bit equivalent double.
572inline double BitsToDouble(uint64_t Bits) {
573 double D;
574 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
575 memcpy(&D, &Bits, sizeof(Bits));
576 return D;
577}
578
579/// This function takes a 32-bit integer and returns the bit equivalent float.
580inline float BitsToFloat(uint32_t Bits) {
581 float F;
582 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
583 memcpy(&F, &Bits, sizeof(Bits));
584 return F;
585}
586
587/// This function takes a double and returns the bit equivalent 64-bit integer.
588/// Note that copying doubles around changes the bits of NaNs on some hosts,
589/// notably x86, so this routine cannot be used if these bits are needed.
590inline uint64_t DoubleToBits(double Double) {
591 uint64_t Bits;
592 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
593 memcpy(&Bits, &Double, sizeof(Double));
594 return Bits;
595}
596
597/// This function takes a float and returns the bit equivalent 32-bit integer.
598/// Note that copying floats around changes the bits of NaNs on some hosts,
599/// notably x86, so this routine cannot be used if these bits are needed.
600inline uint32_t FloatToBits(float Float) {
601 uint32_t Bits;
602 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
603 memcpy(&Bits, &Float, sizeof(Float));
604 return Bits;
605}
606
607/// A and B are either alignments or offsets. Return the minimum alignment that
608/// may be assumed after adding the two together.
609constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
610 // The largest power of 2 that divides both A and B.
611 //
612 // Replace "-Value" by "1+~Value" in the following commented code to avoid
613 // MSVC warning C4146
614 // return (A | B) & -(A | B);
615 return (A | B) & (1 + ~(A | B));
616}
617
618/// Aligns \c Addr to \c Alignment bytes, rounding up.
619///
620/// Alignment should be a power of two. This method rounds up, so
621/// alignAddr(7, 4) == 8 and alignAddr(8, 4) == 8.
622inline uintptr_t alignAddr(const void *Addr, size_t Alignment) {
623 assert(Alignment && isPowerOf2_64((uint64_t)Alignment) &&((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 624, __PRETTY_FUNCTION__))
624 "Alignment is not a power of two!")((Alignment && isPowerOf2_64((uint64_t)Alignment) &&
"Alignment is not a power of two!") ? static_cast<void>
(0) : __assert_fail ("Alignment && isPowerOf2_64((uint64_t)Alignment) && \"Alignment is not a power of two!\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 624, __PRETTY_FUNCTION__))
;
625
626 assert((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr)(((uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr) ? static_cast
<void> (0) : __assert_fail ("(uintptr_t)Addr + Alignment - 1 >= (uintptr_t)Addr"
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 626, __PRETTY_FUNCTION__))
;
627
628 return (((uintptr_t)Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1));
629}
630
631/// Returns the necessary adjustment for aligning \c Ptr to \c Alignment
632/// bytes, rounding up.
633inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) {
634 return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr;
635}
636
637/// Returns the next power of two (in 64-bits) that is strictly greater than A.
638/// Returns zero on overflow.
639inline uint64_t NextPowerOf2(uint64_t A) {
640 A |= (A >> 1);
641 A |= (A >> 2);
642 A |= (A >> 4);
643 A |= (A >> 8);
644 A |= (A >> 16);
645 A |= (A >> 32);
646 return A + 1;
647}
648
649/// Returns the power of two which is less than or equal to the given value.
650/// Essentially, it is a floor operation across the domain of powers of two.
651inline uint64_t PowerOf2Floor(uint64_t A) {
652 if (!A) return 0;
653 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
654}
655
656/// Returns the power of two which is greater than or equal to the given value.
657/// Essentially, it is a ceil operation across the domain of powers of two.
658inline uint64_t PowerOf2Ceil(uint64_t A) {
659 if (!A)
660 return 0;
661 return NextPowerOf2(A - 1);
662}
663
664/// Returns the next integer (mod 2**64) that is greater than or equal to
665/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
666///
667/// If non-zero \p Skew is specified, the return value will be a minimal
668/// integer that is greater than or equal to \p Value and equal to
669/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
670/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
671///
672/// Examples:
673/// \code
674/// alignTo(5, 8) = 8
675/// alignTo(17, 8) = 24
676/// alignTo(~0LL, 8) = 0
677/// alignTo(321, 255) = 510
678///
679/// alignTo(5, 8, 7) = 7
680/// alignTo(17, 8, 1) = 17
681/// alignTo(~0LL, 8, 3) = 3
682/// alignTo(321, 255, 42) = 552
683/// \endcode
684inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
685 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 685, __PRETTY_FUNCTION__))
;
686 Skew %= Align;
687 return (Value + Align - 1 - Skew) / Align * Align + Skew;
688}
689
690/// Returns the next integer (mod 2**64) that is greater than or equal to
691/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
692template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
693 static_assert(Align != 0u, "Align must be non-zero");
694 return (Value + Align - 1) / Align * Align;
695}
696
697/// Returns the integer ceil(Numerator / Denominator).
698inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
699 return alignTo(Numerator, Denominator) / Denominator;
700}
701
702/// \c alignTo for contexts where a constant expression is required.
703/// \sa alignTo
704///
705/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
706template <uint64_t Align>
707struct AlignTo {
708 static_assert(Align != 0u, "Align must be non-zero");
709 template <uint64_t Value>
710 struct from_value {
711 static const uint64_t value = (Value + Align - 1) / Align * Align;
712 };
713};
714
715/// Returns the largest uint64_t less than or equal to \p Value and is
716/// \p Skew mod \p Align. \p Align must be non-zero
717inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
718 assert(Align != 0u && "Align can't be 0.")((Align != 0u && "Align can't be 0.") ? static_cast<
void> (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 718, __PRETTY_FUNCTION__))
;
719 Skew %= Align;
720 return (Value - Skew) / Align * Align + Skew;
721}
722
723/// Returns the offset to the next integer (mod 2**64) that is greater than
724/// or equal to \p Value and is a multiple of \p Align. \p Align must be
725/// non-zero.
726inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
727 return alignTo(Value, Align) - Value;
728}
729
730/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
731/// Requires 0 < B <= 32.
732template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
733 static_assert(B > 0, "Bit width can't be 0.");
734 static_assert(B <= 32, "Bit width out of range.");
735 return int32_t(X << (32 - B)) >> (32 - B);
736}
737
738/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
739/// Requires 0 < B < 32.
740inline int32_t SignExtend32(uint32_t X, unsigned B) {
741 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 741, __PRETTY_FUNCTION__))
;
742 assert(B <= 32 && "Bit width out of range.")((B <= 32 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 742, __PRETTY_FUNCTION__))
;
743 return int32_t(X << (32 - B)) >> (32 - B);
744}
745
746/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
747/// Requires 0 < B < 64.
748template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
749 static_assert(B > 0, "Bit width can't be 0.");
750 static_assert(B <= 64, "Bit width out of range.");
751 return int64_t(x << (64 - B)) >> (64 - B);
752}
753
754/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
755/// Requires 0 < B < 64.
756inline int64_t SignExtend64(uint64_t X, unsigned B) {
757 assert(B > 0 && "Bit width can't be 0.")((B > 0 && "Bit width can't be 0.") ? static_cast<
void> (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 757, __PRETTY_FUNCTION__))
;
758 assert(B <= 64 && "Bit width out of range.")((B <= 64 && "Bit width out of range.") ? static_cast
<void> (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-9~svn358520/include/llvm/Support/MathExtras.h"
, 758, __PRETTY_FUNCTION__))
;
759 return int64_t(X << (64 - B)) >> (64 - B);
760}
761
762/// Subtract two unsigned integers, X and Y, of type T and return the absolute
763/// value of the result.
764template <typename T>
765typename std::enable_if<std::is_unsigned<T>::value, T>::type
766AbsoluteDifference(T X, T Y) {
767 return std::max(X, Y) - std::min(X, Y);
768}
769
770/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
771/// maximum representable value of T on overflow. ResultOverflowed indicates if
772/// the result is larger than the maximum representable value of type T.
773template <typename T>
774typename std::enable_if<std::is_unsigned<T>::value, T>::type
775SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
776 bool Dummy;
777 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
778 // Hacker's Delight, p. 29
779 T Z = X + Y;
780 Overflowed = (Z < X || Z < Y);
781 if (Overflowed)
782 return std::numeric_limits<T>::max();
783 else
784 return Z;
785}
786
787/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
788/// maximum representable value of T on overflow. ResultOverflowed indicates if
789/// the result is larger than the maximum representable value of type T.
790template <typename T>
791typename std::enable_if<std::is_unsigned<T>::value, T>::type
792SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
793 bool Dummy;
794 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
795
796 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
797 // because it fails for uint16_t (where multiplication can have undefined
798 // behavior due to promotion to int), and requires a division in addition
799 // to the multiplication.
800
801 Overflowed = false;
802
803 // Log2(Z) would be either Log2Z or Log2Z + 1.
804 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
805 // will necessarily be less than Log2Max as desired.
806 int Log2Z = Log2_64(X) + Log2_64(Y);
807 const T Max = std::numeric_limits<T>::max();
808 int Log2Max = Log2_64(Max);
809 if (Log2Z < Log2Max) {
810 return X * Y;
811 }
812 if (Log2Z > Log2Max) {
813 Overflowed = true;
814 return Max;
815 }
816
817 // We're going to use the top bit, and maybe overflow one
818 // bit past it. Multiply all but the bottom bit then add
819 // that on at the end.
820 T Z = (X >> 1) * Y;
821 if (Z & ~(Max >> 1)) {
822 Overflowed = true;
823 return Max;
824 }
825 Z <<= 1;
826 if (X & 1)
827 return SaturatingAdd(Z, Y, ResultOverflowed);
828
829 return Z;
830}
831
832/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
833/// the product. Clamp the result to the maximum representable value of T on
834/// overflow. ResultOverflowed indicates if the result is larger than the
835/// maximum representable value of type T.
836template <typename T>
837typename std::enable_if<std::is_unsigned<T>::value, T>::type
838SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
839 bool Dummy;
840 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
841
842 T Product = SaturatingMultiply(X, Y, &Overflowed);
843 if (Overflowed)
844 return Product;
845
846 return SaturatingAdd(A, Product, &Overflowed);
847}
848
849/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
850extern const float huge_valf;
851} // End llvm namespace
852
853#endif