Bug Summary

File:lib/Target/ARM/ARMTargetTransformInfo.cpp
Warning:line 221, column 16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn329677/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/ARM -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-04-11-031539-24776-1 -x c++ /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/ARMTargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/ARMTargetTransformInfo.cpp

1//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARMTargetTransformInfo.h"
11#include "ARMSubtarget.h"
12#include "MCTargetDesc/ARMAddressingModes.h"
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/Analysis/LoopInfo.h"
16#include "llvm/CodeGen/CostTable.h"
17#include "llvm/CodeGen/ISDOpcodes.h"
18#include "llvm/CodeGen/ValueTypes.h"
19#include "llvm/IR/BasicBlock.h"
20#include "llvm/IR/CallSite.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DerivedTypes.h"
23#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Instructions.h"
25#include "llvm/IR/Type.h"
26#include "llvm/MC/SubtargetFeature.h"
27#include "llvm/Support/Casting.h"
28#include "llvm/Support/MachineValueType.h"
29#include "llvm/Target/TargetMachine.h"
30#include <algorithm>
31#include <cassert>
32#include <cstdint>
33#include <utility>
34
35using namespace llvm;
36
37#define DEBUG_TYPE"armtti" "armtti"
38
39bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
40 const Function *Callee) const {
41 const TargetMachine &TM = getTLI()->getTargetMachine();
42 const FeatureBitset &CallerBits =
43 TM.getSubtargetImpl(*Caller)->getFeatureBits();
44 const FeatureBitset &CalleeBits =
45 TM.getSubtargetImpl(*Callee)->getFeatureBits();
46
47 // To inline a callee, all features not in the whitelist must match exactly.
48 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
49 (CalleeBits & ~InlineFeatureWhitelist);
50 // For features in the whitelist, the callee's features must be a subset of
51 // the callers'.
52 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
53 (CalleeBits & InlineFeatureWhitelist);
54 return MatchExact && MatchSubset;
55}
56
57int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
58 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 58, __extension__ __PRETTY_FUNCTION__))
;
59
60 unsigned Bits = Ty->getPrimitiveSizeInBits();
61 if (Bits == 0 || Imm.getActiveBits() >= 64)
7
Assuming 'Bits' is not equal to 0
8
Taking false branch
62 return 4;
63
64 int64_t SImmVal = Imm.getSExtValue();
65 uint64_t ZImmVal = Imm.getZExtValue();
66 if (!ST->isThumb()) {
9
Assuming the condition is false
10
Taking false branch
67 if ((SImmVal >= 0 && SImmVal < 65536) ||
68 (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
69 (ARM_AM::getSOImmVal(~ZImmVal) != -1))
70 return 1;
71 return ST->hasV6T2Ops() ? 2 : 3;
72 }
73 if (ST->isThumb2()) {
11
Assuming the condition is false
12
Taking false branch
74 if ((SImmVal >= 0 && SImmVal < 65536) ||
75 (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
76 (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
77 return 1;
78 return ST->hasV6T2Ops() ? 2 : 3;
79 }
80 // Thumb1.
81 if (SImmVal >= 0 && SImmVal < 256)
13
Assuming 'SImmVal' is < 0
82 return 1;
83 if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
14
Calling 'isThumbImmShiftedVal'
84 return 2;
85 // Load from constantpool.
86 return 3;
87}
88
89// Constants smaller than 256 fit in the immediate field of
90// Thumb1 instructions so we return a zero cost and 1 otherwise.
91int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
92 const APInt &Imm, Type *Ty) {
93 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
94 return 0;
95
96 return 1;
97}
98
99int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
100 Type *Ty) {
101 // Division by a constant can be turned into multiplication, but only if we
102 // know it's constant. So it's not so much that the immediate is cheap (it's
103 // not), but that the alternative is worse.
104 // FIXME: this is probably unneeded with GlobalISel.
105 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1
Assuming 'Opcode' is equal to SDiv
3
Taking false branch
106 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
107 Idx == 1)
2
Assuming 'Idx' is not equal to 1
108 return 0;
109
110 if (Opcode == Instruction::And)
4
Taking false branch
111 // Conversion to BIC is free, and means we can use ~Imm instead.
112 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
113
114 if (Opcode == Instruction::Add)
5
Taking false branch
115 // Conversion to SUB is free, and means we can use -Imm instead.
116 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
117
118 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
119 Ty->getIntegerBitWidth() == 32) {
120 int64_t NegImm = -Imm.getSExtValue();
121 if (ST->isThumb2() && NegImm < 1<<12)
122 // icmp X, #-C -> cmn X, #C
123 return 0;
124 if (ST->isThumb() && NegImm < 1<<8)
125 // icmp X, #-C -> adds X, #C
126 return 0;
127 }
128
129 // xor a, -1 can always be folded to MVN
130 if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
131 return 0;
132
133 return getIntImmCost(Imm, Ty);
6
Calling 'ARMTTIImpl::getIntImmCost'
134}
135
136int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
137 const Instruction *I) {
138 int ISD = TLI->InstructionOpcodeToISD(Opcode);
139 assert(ISD && "Invalid opcode")(static_cast <bool> (ISD && "Invalid opcode") ?
void (0) : __assert_fail ("ISD && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 139, __extension__ __PRETTY_FUNCTION__))
;
140
141 // Single to/from double precision conversions.
142 static const CostTblEntry NEONFltDblTbl[] = {
143 // Vector fptrunc/fpext conversions.
144 { ISD::FP_ROUND, MVT::v2f64, 2 },
145 { ISD::FP_EXTEND, MVT::v2f32, 2 },
146 { ISD::FP_EXTEND, MVT::v4f32, 4 }
147 };
148
149 if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
150 ISD == ISD::FP_EXTEND)) {
151 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
152 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
153 return LT.first * Entry->Cost;
154 }
155
156 EVT SrcTy = TLI->getValueType(DL, Src);
157 EVT DstTy = TLI->getValueType(DL, Dst);
158
159 if (!SrcTy.isSimple() || !DstTy.isSimple())
160 return BaseT::getCastInstrCost(Opcode, Dst, Src);
161
162 // Some arithmetic, load and store operations have specific instructions
163 // to cast up/down their types automatically at no extra cost.
164 // TODO: Get these tables to know at least what the related operations are.
165 static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
166 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
167 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
168 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
169 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
170 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
171 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
172
173 // The number of vmovl instructions for the extension.
174 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
175 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
176 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
177 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
178 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
179 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
180 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
181 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
182 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
183 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
184
185 // Operations that we legalize using splitting.
186 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
187 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
188
189 // Vector float <-> i32 conversions.
190 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
191 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
192
193 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
194 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
195 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
196 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
197 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
198 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
199 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
200 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
201 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
202 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
203 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
204 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
205 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
206 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
207 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
208 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
209 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
210 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
211 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
212 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
213
214 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
215 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
216 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
217 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
218 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
219 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
220
221 // Vector double <-> i32 conversions.
222 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
223 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
224
225 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
226 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
227 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
228 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
229 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
230 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
231
232 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
233 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
234 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
235 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
236 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
237 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
238 };
239
240 if (SrcTy.isVector() && ST->hasNEON()) {
241 if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
242 DstTy.getSimpleVT(),
243 SrcTy.getSimpleVT()))
244 return Entry->Cost;
245 }
246
247 // Scalar float to integer conversions.
248 static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
249 { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
250 { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
251 { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
252 { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
253 { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
254 { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
255 { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
256 { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
257 { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
258 { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
259 { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
260 { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
261 { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
262 { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
263 { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
264 { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
265 { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
266 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
267 { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
268 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
269 };
270 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
271 if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
272 DstTy.getSimpleVT(),
273 SrcTy.getSimpleVT()))
274 return Entry->Cost;
275 }
276
277 // Scalar integer to float conversions.
278 static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
279 { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
280 { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
281 { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
282 { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
283 { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
284 { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
285 { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
286 { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
287 { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
288 { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
289 { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
290 { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
291 { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
292 { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
293 { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
294 { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
295 { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
296 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
297 { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
298 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
299 };
300
301 if (SrcTy.isInteger() && ST->hasNEON()) {
302 if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
303 ISD, DstTy.getSimpleVT(),
304 SrcTy.getSimpleVT()))
305 return Entry->Cost;
306 }
307
308 // Scalar integer conversion costs.
309 static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
310 // i16 -> i64 requires two dependent operations.
311 { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
312
313 // Truncates on i64 are assumed to be free.
314 { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
315 { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
316 { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
317 { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
318 };
319
320 if (SrcTy.isInteger()) {
321 if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
322 DstTy.getSimpleVT(),
323 SrcTy.getSimpleVT()))
324 return Entry->Cost;
325 }
326
327 return BaseT::getCastInstrCost(Opcode, Dst, Src);
328}
329
330int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
331 unsigned Index) {
332 // Penalize inserting into an D-subregister. We end up with a three times
333 // lower estimated throughput on swift.
334 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
335 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
336 return 3;
337
338 if ((Opcode == Instruction::InsertElement ||
339 Opcode == Instruction::ExtractElement)) {
340 // Cross-class copies are expensive on many microarchitectures,
341 // so assume they are expensive by default.
342 if (ValTy->getVectorElementType()->isIntegerTy())
343 return 3;
344
345 // Even if it's not a cross class copy, this likely leads to mixing
346 // of NEON and VFP code and should be therefore penalized.
347 if (ValTy->isVectorTy() &&
348 ValTy->getScalarSizeInBits() <= 32)
349 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
350 }
351
352 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
353}
354
355int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
356 const Instruction *I) {
357 int ISD = TLI->InstructionOpcodeToISD(Opcode);
358 // On NEON a vector select gets lowered to vbsl.
359 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
360 // Lowering of some vector selects is currently far from perfect.
361 static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
362 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
363 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
364 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
365 };
366
367 EVT SelCondTy = TLI->getValueType(DL, CondTy);
368 EVT SelValTy = TLI->getValueType(DL, ValTy);
369 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
370 if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
371 SelCondTy.getSimpleVT(),
372 SelValTy.getSimpleVT()))
373 return Entry->Cost;
374 }
375
376 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
377 return LT.first;
378 }
379
380 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
381}
382
383int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
384 const SCEV *Ptr) {
385 // Address computations in vectorized code with non-consecutive addresses will
386 // likely result in more instructions compared to scalar code where the
387 // computation can more often be merged into the index mode. The resulting
388 // extra micro-ops can significantly decrease throughput.
389 unsigned NumVectorInstToHideOverhead = 10;
390 int MaxMergeDistance = 64;
391
392 if (Ty->isVectorTy() && SE &&
393 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
394 return NumVectorInstToHideOverhead;
395
396 // In many cases the address computation is not merged into the instruction
397 // addressing mode.
398 return 1;
399}
400
401int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
402 Type *SubTp) {
403 // We only handle costs of reverse and alternate shuffles for now.
404 if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
405 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
406
407 if (Kind == TTI::SK_Reverse) {
408 static const CostTblEntry NEONShuffleTbl[] = {
409 // Reverse shuffle cost one instruction if we are shuffling within a
410 // double word (vrev) or two if we shuffle a quad word (vrev, vext).
411 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
412 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
413 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
414 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
415
416 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
417 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
418 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
419 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
420
421 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
422
423 if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
424 LT.second))
425 return LT.first * Entry->Cost;
426
427 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
428 }
429 if (Kind == TTI::SK_Alternate) {
430 static const CostTblEntry NEONAltShuffleTbl[] = {
431 // Alt shuffle cost table for ARM. Cost is the number of instructions
432 // required to create the shuffled vector.
433
434 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
435 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
436 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
437 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
438
439 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
440 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
441 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
442
443 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
444
445 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
446
447 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
448 if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
449 ISD::VECTOR_SHUFFLE, LT.second))
450 return LT.first * Entry->Cost;
451 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
452 }
453 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
454}
455
456int ARMTTIImpl::getArithmeticInstrCost(
457 unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
458 TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
459 TTI::OperandValueProperties Opd2PropInfo,
460 ArrayRef<const Value *> Args) {
461 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
462 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
463
464 const unsigned FunctionCallDivCost = 20;
465 const unsigned ReciprocalDivCost = 10;
466 static const CostTblEntry CostTbl[] = {
467 // Division.
468 // These costs are somewhat random. Choose a cost of 20 to indicate that
469 // vectorizing devision (added function call) is going to be very expensive.
470 // Double registers types.
471 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
472 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
473 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
474 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
475 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
476 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
477 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
478 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
479 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
480 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
481 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
482 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
483 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
484 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
485 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
486 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
487 // Quad register types.
488 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
489 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
490 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
491 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
492 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
493 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
494 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
495 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
496 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
497 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
498 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
499 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
500 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
501 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
502 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
503 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
504 // Multiplication.
505 };
506
507 if (ST->hasNEON())
508 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
509 return LT.first * Entry->Cost;
510
511 int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
512 Opd1PropInfo, Opd2PropInfo);
513
514 // This is somewhat of a hack. The problem that we are facing is that SROA
515 // creates a sequence of shift, and, or instructions to construct values.
516 // These sequences are recognized by the ISel and have zero-cost. Not so for
517 // the vectorized code. Because we have support for v2i64 but not i64 those
518 // sequences look particularly beneficial to vectorize.
519 // To work around this we increase the cost of v2i64 operations to make them
520 // seem less beneficial.
521 if (LT.second == MVT::v2i64 &&
522 Op2Info == TargetTransformInfo::OK_UniformConstantValue)
523 Cost += 4;
524
525 return Cost;
526}
527
528int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
529 unsigned AddressSpace, const Instruction *I) {
530 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
531
532 if (Src->isVectorTy() && Alignment != 16 &&
533 Src->getVectorElementType()->isDoubleTy()) {
534 // Unaligned loads/stores are extremely inefficient.
535 // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
536 return LT.first * 4;
537 }
538 return LT.first;
539}
540
541int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
542 unsigned Factor,
543 ArrayRef<unsigned> Indices,
544 unsigned Alignment,
545 unsigned AddressSpace) {
546 assert(Factor >= 2 && "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
;
547 assert(isa<VectorType>(VecTy) && "Expect a vector type")(static_cast <bool> (isa<VectorType>(VecTy) &&
"Expect a vector type") ? void (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 547, __extension__ __PRETTY_FUNCTION__))
;
548
549 // vldN/vstN doesn't support vector types of i64/f64 element.
550 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
551
552 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
553 unsigned NumElts = VecTy->getVectorNumElements();
554 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
555
556 // vldN/vstN only support legal vector types of size 64 or 128 in bits.
557 // Accesses having vector types that are a multiple of 128 bits can be
558 // matched to more than one vldN/vstN instruction.
559 if (NumElts % Factor == 0 &&
560 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
561 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
562 }
563
564 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
565 Alignment, AddressSpace);
566}
567
568void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
569 TTI::UnrollingPreferences &UP) {
570 // Only currently enable these preferences for M-Class cores.
571 if (!ST->isMClass())
572 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
573
574 // Disable loop unrolling for Oz and Os.
575 UP.OptSizeThreshold = 0;
576 UP.PartialOptSizeThreshold = 0;
577 if (L->getHeader()->getParent()->optForSize())
578 return;
579
580 // Only enable on Thumb-2 targets.
581 if (!ST->isThumb2())
582 return;
583
584 SmallVector<BasicBlock*, 4> ExitingBlocks;
585 L->getExitingBlocks(ExitingBlocks);
586 DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
587 << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
588 << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
;
589
590 // Only allow another exit other than the latch. This acts as an early exit
591 // as it mirrors the profitability calculation of the runtime unroller.
592 if (ExitingBlocks.size() > 2)
593 return;
594
595 // Limit the CFG of the loop body for targets with a branch predictor.
596 // Allowing 4 blocks permits if-then-else diamonds in the body.
597 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
598 return;
599
600 // Scan the loop: don't unroll loops with calls as this could prevent
601 // inlining.
602 unsigned Cost = 0;
603 for (auto *BB : L->getBlocks()) {
604 for (auto &I : *BB) {
605 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
606 ImmutableCallSite CS(&I);
607 if (const Function *F = CS.getCalledFunction()) {
608 if (!isLoweredToCall(F))
609 continue;
610 }
611 return;
612 }
613 SmallVector<const Value*, 4> Operands(I.value_op_begin(),
614 I.value_op_end());
615 Cost += getUserCost(&I, Operands);
616 }
617 }
618
619 DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Cost of loop: " << Cost <<
"\n"; } } while (false)
;
620
621 UP.Partial = true;
622 UP.Runtime = true;
623 UP.UnrollRemainder = true;
624 UP.DefaultUnrollRuntimeCount = 4;
625
626 // Force unrolling small loops can be very useful because of the branch
627 // taken cost of the backedge.
628 if (Cost < 12)
629 UP.Force = true;
630}

/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

1//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the ARM addressing mode implementation stuff.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
15#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/APInt.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22
23namespace llvm {
24
25/// ARM_AM - ARM Addressing Mode Stuff
26namespace ARM_AM {
27 enum ShiftOpc {
28 no_shift = 0,
29 asr,
30 lsl,
31 lsr,
32 ror,
33 rrx
34 };
35
36 enum AddrOpc {
37 sub = 0,
38 add
39 };
40
41 inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
42
43 inline const char *getShiftOpcStr(ShiftOpc Op) {
44 switch (Op) {
45 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 45)
;
46 case ARM_AM::asr: return "asr";
47 case ARM_AM::lsl: return "lsl";
48 case ARM_AM::lsr: return "lsr";
49 case ARM_AM::ror: return "ror";
50 case ARM_AM::rrx: return "rrx";
51 }
52 }
53
54 inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
55 switch (Op) {
56 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 56)
;
57 case ARM_AM::asr: return 2;
58 case ARM_AM::lsl: return 0;
59 case ARM_AM::lsr: return 1;
60 case ARM_AM::ror: return 3;
61 }
62 }
63
64 enum AMSubMode {
65 bad_am_submode = 0,
66 ia,
67 ib,
68 da,
69 db
70 };
71
72 inline const char *getAMSubModeStr(AMSubMode Mode) {
73 switch (Mode) {
74 default: llvm_unreachable("Unknown addressing sub-mode!")::llvm::llvm_unreachable_internal("Unknown addressing sub-mode!"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 74)
;
75 case ARM_AM::ia: return "ia";
76 case ARM_AM::ib: return "ib";
77 case ARM_AM::da: return "da";
78 case ARM_AM::db: return "db";
79 }
80 }
81
82 /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
83 ///
84 inline unsigned rotr32(unsigned Val, unsigned Amt) {
85 assert(Amt < 32 && "Invalid rotate amount")(static_cast <bool> (Amt < 32 && "Invalid rotate amount"
) ? void (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 85, __extension__ __PRETTY_FUNCTION__))
;
86 return (Val >> Amt) | (Val << ((32-Amt)&31));
87 }
88
89 /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
90 ///
91 inline unsigned rotl32(unsigned Val, unsigned Amt) {
92 assert(Amt < 32 && "Invalid rotate amount")(static_cast <bool> (Amt < 32 && "Invalid rotate amount"
) ? void (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 92, __extension__ __PRETTY_FUNCTION__))
;
93 return (Val << Amt) | (Val >> ((32-Amt)&31));
94 }
95
96 //===--------------------------------------------------------------------===//
97 // Addressing Mode #1: shift_operand with registers
98 //===--------------------------------------------------------------------===//
99 //
100 // This 'addressing mode' is used for arithmetic instructions. It can
101 // represent things like:
102 // reg
103 // reg [asr|lsl|lsr|ror|rrx] reg
104 // reg [asr|lsl|lsr|ror|rrx] imm
105 //
106 // This is stored three operands [rega, regb, opc]. The first is the base
107 // reg, the second is the shift amount (or reg0 if not present or imm). The
108 // third operand encodes the shift opcode and the imm if a reg isn't present.
109 //
110 inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
111 return ShOp | (Imm << 3);
112 }
113 inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; }
114 inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); }
115
116 /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
117 /// the 8-bit imm value.
118 inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; }
119 /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
120 /// the rotate amount.
121 inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; }
122
123 /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
124 /// computing the rotate amount to use. If this immediate value cannot be
125 /// handled with a single shifter-op, determine a good rotate amount that will
126 /// take a maximal chunk of bits out of the immediate.
127 inline unsigned getSOImmValRotate(unsigned Imm) {
128 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
129 // of zero.
130 if ((Imm & ~255U) == 0) return 0;
131
132 // Use CTZ to compute the rotate amount.
133 unsigned TZ = countTrailingZeros(Imm);
134
135 // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
136 // not 9.
137 unsigned RotAmt = TZ & ~1;
138
139 // If we can handle this spread, return it.
140 if ((rotr32(Imm, RotAmt) & ~255U) == 0)
141 return (32-RotAmt)&31; // HW rotates right, not left.
142
143 // For values like 0xF000000F, we should ignore the low 6 bits, then
144 // retry the hunt.
145 if (Imm & 63U) {
146 unsigned TZ2 = countTrailingZeros(Imm & ~63U);
147 unsigned RotAmt2 = TZ2 & ~1;
148 if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
149 return (32-RotAmt2)&31; // HW rotates right, not left.
150 }
151
152 // Otherwise, we have no way to cover this span of bits with a single
153 // shifter_op immediate. Return a chunk of bits that will be useful to
154 // handle.
155 return (32-RotAmt)&31; // HW rotates right, not left.
156 }
157
158 /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
159 /// into an shifter_operand immediate operand, return the 12-bit encoding for
160 /// it. If not, return -1.
161 inline int getSOImmVal(unsigned Arg) {
162 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
163 // of zero.
164 if ((Arg & ~255U) == 0) return Arg;
165
166 unsigned RotAmt = getSOImmValRotate(Arg);
167
168 // If this cannot be handled with a single shifter_op, bail out.
169 if (rotr32(~255U, RotAmt) & Arg)
170 return -1;
171
172 // Encode this correctly.
173 return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
174 }
175
176 /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
177 /// or'ing together two SOImmVal's.
178 inline bool isSOImmTwoPartVal(unsigned V) {
179 // If this can be handled with a single shifter_op, bail out.
180 V = rotr32(~255U, getSOImmValRotate(V)) & V;
181 if (V == 0)
182 return false;
183
184 // If this can be handled with two shifter_op's, accept.
185 V = rotr32(~255U, getSOImmValRotate(V)) & V;
186 return V == 0;
187 }
188
189 /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
190 /// return the first chunk of it.
191 inline unsigned getSOImmTwoPartFirst(unsigned V) {
192 return rotr32(255U, getSOImmValRotate(V)) & V;
193 }
194
195 /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
196 /// return the second chunk of it.
197 inline unsigned getSOImmTwoPartSecond(unsigned V) {
198 // Mask out the first hunk.
199 V = rotr32(~255U, getSOImmValRotate(V)) & V;
200
201 // Take what's left.
202 assert(V == (rotr32(255U, getSOImmValRotate(V)) & V))(static_cast <bool> (V == (rotr32(255U, getSOImmValRotate
(V)) & V)) ? void (0) : __assert_fail ("V == (rotr32(255U, getSOImmValRotate(V)) & V)"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 202, __extension__ __PRETTY_FUNCTION__))
;
203 return V;
204 }
205
206 /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
207 /// by a left shift. Returns the shift amount to use.
208 inline unsigned getThumbImmValShift(unsigned Imm) {
209 // 8-bit (or less) immediates are trivially immediate operand with a shift
210 // of zero.
211 if ((Imm & ~255U) == 0) return 0;
212
213 // Use CTZ to compute the shift amount.
214 return countTrailingZeros(Imm);
215 }
216
217 /// isThumbImmShiftedVal - Return true if the specified value can be obtained
218 /// by left shifting a 8-bit immediate.
219 inline bool isThumbImmShiftedVal(unsigned V) {
220 // If this can be handled with
221 V = (~255U << getThumbImmValShift(V)) & V;
15
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
222 return V == 0;
223 }
224
225 /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
226 /// by a left shift. Returns the shift amount to use.
227 inline unsigned getThumbImm16ValShift(unsigned Imm) {
228 // 16-bit (or less) immediates are trivially immediate operand with a shift
229 // of zero.
230 if ((Imm & ~65535U) == 0) return 0;
231
232 // Use CTZ to compute the shift amount.
233 return countTrailingZeros(Imm);
234 }
235
236 /// isThumbImm16ShiftedVal - Return true if the specified value can be
237 /// obtained by left shifting a 16-bit immediate.
238 inline bool isThumbImm16ShiftedVal(unsigned V) {
239 // If this can be handled with
240 V = (~65535U << getThumbImm16ValShift(V)) & V;
241 return V == 0;
242 }
243
244 /// getThumbImmNonShiftedVal - If V is a value that satisfies
245 /// isThumbImmShiftedVal, return the non-shiftd value.
246 inline unsigned getThumbImmNonShiftedVal(unsigned V) {
247 return V >> getThumbImmValShift(V);
248 }
249
250
251 /// getT2SOImmValSplat - Return the 12-bit encoded representation
252 /// if the specified value can be obtained by splatting the low 8 bits
253 /// into every other byte or every byte of a 32-bit value. i.e.,
254 /// 00000000 00000000 00000000 abcdefgh control = 0
255 /// 00000000 abcdefgh 00000000 abcdefgh control = 1
256 /// abcdefgh 00000000 abcdefgh 00000000 control = 2
257 /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
258 /// Return -1 if none of the above apply.
259 /// See ARM Reference Manual A6.3.2.
260 inline int getT2SOImmValSplatVal(unsigned V) {
261 unsigned u, Vs, Imm;
262 // control = 0
263 if ((V & 0xffffff00) == 0)
264 return V;
265
266 // If the value is zeroes in the first byte, just shift those off
267 Vs = ((V & 0xff) == 0) ? V >> 8 : V;
268 // Any passing value only has 8 bits of payload, splatted across the word
269 Imm = Vs & 0xff;
270 // Likewise, any passing values have the payload splatted into the 3rd byte
271 u = Imm | (Imm << 16);
272
273 // control = 1 or 2
274 if (Vs == u)
275 return (((Vs == V) ? 1 : 2) << 8) | Imm;
276
277 // control = 3
278 if (Vs == (u | (u << 8)))
279 return (3 << 8) | Imm;
280
281 return -1;
282 }
283
284 /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
285 /// specified value is a rotated 8-bit value. Return -1 if no rotation
286 /// encoding is possible.
287 /// See ARM Reference Manual A6.3.2.
288 inline int getT2SOImmValRotateVal(unsigned V) {
289 unsigned RotAmt = countLeadingZeros(V);
290 if (RotAmt >= 24)
291 return -1;
292
293 // If 'Arg' can be handled with a single shifter_op return the value.
294 if ((rotr32(0xff000000U, RotAmt) & V) == V)
295 return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
296
297 return -1;
298 }
299
300 /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
301 /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
302 /// encoding for it. If not, return -1.
303 /// See ARM Reference Manual A6.3.2.
304 inline int getT2SOImmVal(unsigned Arg) {
305 // If 'Arg' is an 8-bit splat, then get the encoded value.
306 int Splat = getT2SOImmValSplatVal(Arg);
307 if (Splat != -1)
308 return Splat;
309
310 // If 'Arg' can be handled with a single shifter_op return the value.
311 int Rot = getT2SOImmValRotateVal(Arg);
312 if (Rot != -1)
313 return Rot;
314
315 return -1;
316 }
317
318 inline unsigned getT2SOImmValRotate(unsigned V) {
319 if ((V & ~255U) == 0) return 0;
320 // Use CTZ to compute the rotate amount.
321 unsigned RotAmt = countTrailingZeros(V);
322 return (32 - RotAmt) & 31;
323 }
324
325 inline bool isT2SOImmTwoPartVal(unsigned Imm) {
326 unsigned V = Imm;
327 // Passing values can be any combination of splat values and shifter
328 // values. If this can be handled with a single shifter or splat, bail
329 // out. Those should be handled directly, not with a two-part val.
330 if (getT2SOImmValSplatVal(V) != -1)
331 return false;
332 V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
333 if (V == 0)
334 return false;
335
336 // If this can be handled as an immediate, accept.
337 if (getT2SOImmVal(V) != -1) return true;
338
339 // Likewise, try masking out a splat value first.
340 V = Imm;
341 if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
342 V &= ~0xff00ff00U;
343 else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
344 V &= ~0x00ff00ffU;
345 // If what's left can be handled as an immediate, accept.
346 if (getT2SOImmVal(V) != -1) return true;
347
348 // Otherwise, do not accept.
349 return false;
350 }
351
352 inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
353 assert (isT2SOImmTwoPartVal(Imm) &&(static_cast <bool> (isT2SOImmTwoPartVal(Imm) &&
"Immedate cannot be encoded as two part immediate!") ? void (
0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __extension__ __PRETTY_FUNCTION__))
354 "Immedate cannot be encoded as two part immediate!")(static_cast <bool> (isT2SOImmTwoPartVal(Imm) &&
"Immedate cannot be encoded as two part immediate!") ? void (
0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __extension__ __PRETTY_FUNCTION__))
;
355 // Try a shifter operand as one part
356 unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
357 // If the rest is encodable as an immediate, then return it.
358 if (getT2SOImmVal(V) != -1) return V;
359
360 // Try masking out a splat value first.
361 if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
362 return Imm & 0xff00ff00U;
363
364 // The other splat is all that's left as an option.
365 assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1)(static_cast <bool> (getT2SOImmValSplatVal(Imm & 0x00ff00ffU
) != -1) ? void (0) : __assert_fail ("getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 365, __extension__ __PRETTY_FUNCTION__))
;
366 return Imm & 0x00ff00ffU;
367 }
368
369 inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
370 // Mask out the first hunk
371 Imm ^= getT2SOImmTwoPartFirst(Imm);
372 // Return what's left
373 assert (getT2SOImmVal(Imm) != -1 &&(static_cast <bool> (getT2SOImmVal(Imm) != -1 &&
"Unable to encode second part of T2 two part SO immediate") ?
void (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __extension__ __PRETTY_FUNCTION__))
374 "Unable to encode second part of T2 two part SO immediate")(static_cast <bool> (getT2SOImmVal(Imm) != -1 &&
"Unable to encode second part of T2 two part SO immediate") ?
void (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __extension__ __PRETTY_FUNCTION__))
;
375 return Imm;
376 }
377
378
379 //===--------------------------------------------------------------------===//
380 // Addressing Mode #2
381 //===--------------------------------------------------------------------===//
382 //
383 // This is used for most simple load/store instructions.
384 //
385 // addrmode2 := reg +/- reg shop imm
386 // addrmode2 := reg +/- imm12
387 //
388 // The first operand is always a Reg. The second operand is a reg if in
389 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
390 // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
391 // fourth operand 16-17 encodes the index mode.
392 //
393 // If this addressing mode is a frame index (before prolog/epilog insertion
394 // and code rewriting), this operand will have the form: FI#, reg0, <offs>
395 // with no shift amount for the frame offset.
396 //
397 inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
398 unsigned IdxMode = 0) {
399 assert(Imm12 < (1 << 12) && "Imm too large!")(static_cast <bool> (Imm12 < (1 << 12) &&
"Imm too large!") ? void (0) : __assert_fail ("Imm12 < (1 << 12) && \"Imm too large!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 399, __extension__ __PRETTY_FUNCTION__))
;
400 bool isSub = Opc == sub;
401 return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
402 }
403 inline unsigned getAM2Offset(unsigned AM2Opc) {
404 return AM2Opc & ((1 << 12)-1);
405 }
406 inline AddrOpc getAM2Op(unsigned AM2Opc) {
407 return ((AM2Opc >> 12) & 1) ? sub : add;
408 }
409 inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
410 return (ShiftOpc)((AM2Opc >> 13) & 7);
411 }
412 inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); }
413
414 //===--------------------------------------------------------------------===//
415 // Addressing Mode #3
416 //===--------------------------------------------------------------------===//
417 //
418 // This is used for sign-extending loads, and load/store-pair instructions.
419 //
420 // addrmode3 := reg +/- reg
421 // addrmode3 := reg +/- imm8
422 //
423 // The first operand is always a Reg. The second operand is a reg if in
424 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
425 // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
426 // index mode.
427
428 /// getAM3Opc - This function encodes the addrmode3 opc field.
429 inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
430 unsigned IdxMode = 0) {
431 bool isSub = Opc == sub;
432 return ((int)isSub << 8) | Offset | (IdxMode << 9);
433 }
434 inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; }
435 inline AddrOpc getAM3Op(unsigned AM3Opc) {
436 return ((AM3Opc >> 8) & 1) ? sub : add;
437 }
438 inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); }
439
440 //===--------------------------------------------------------------------===//
441 // Addressing Mode #4
442 //===--------------------------------------------------------------------===//
443 //
444 // This is used for load / store multiple instructions.
445 //
446 // addrmode4 := reg, <mode>
447 //
448 // The four modes are:
449 // IA - Increment after
450 // IB - Increment before
451 // DA - Decrement after
452 // DB - Decrement before
453 // For VFP instructions, only the IA and DB modes are valid.
454
455 inline AMSubMode getAM4SubMode(unsigned Mode) {
456 return (AMSubMode)(Mode & 0x7);
457 }
458
459 inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; }
460
461 //===--------------------------------------------------------------------===//
462 // Addressing Mode #5
463 //===--------------------------------------------------------------------===//
464 //
465 // This is used for coprocessor instructions, such as FP load/stores.
466 //
467 // addrmode5 := reg +/- imm8*4
468 //
469 // The first operand is always a Reg. The second operand encodes the
470 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
471
472 /// getAM5Opc - This function encodes the addrmode5 opc field.
473 inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
474 bool isSub = Opc == sub;
475 return ((int)isSub << 8) | Offset;
476 }
477 inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; }
478 inline AddrOpc getAM5Op(unsigned AM5Opc) {
479 return ((AM5Opc >> 8) & 1) ? sub : add;
480 }
481
482 //===--------------------------------------------------------------------===//
483 // Addressing Mode #5 FP16
484 //===--------------------------------------------------------------------===//
485 //
486 // This is used for coprocessor instructions, such as 16-bit FP load/stores.
487 //
488 // addrmode5fp16 := reg +/- imm8*2
489 //
490 // The first operand is always a Reg. The second operand encodes the
491 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
492
493 /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
494 inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
495 bool isSub = Opc == sub;
496 return ((int)isSub << 8) | Offset;
497 }
498 inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
499 return AM5Opc & 0xFF;
500 }
501 inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
502 return ((AM5Opc >> 8) & 1) ? sub : add;
503 }
504
505 //===--------------------------------------------------------------------===//
506 // Addressing Mode #6
507 //===--------------------------------------------------------------------===//
508 //
509 // This is used for NEON load / store instructions.
510 //
511 // addrmode6 := reg with optional alignment
512 //
513 // This is stored in two operands [regaddr, align]. The first is the
514 // address register. The second operand is the value of the alignment
515 // specifier in bytes or zero if no explicit alignment.
516 // Valid alignments depend on the specific instruction.
517
518 //===--------------------------------------------------------------------===//
519 // NEON Modified Immediates
520 //===--------------------------------------------------------------------===//
521 //
522 // Several NEON instructions (e.g., VMOV) take a "modified immediate"
523 // vector operand, where a small immediate encoded in the instruction
524 // specifies a full NEON vector value. These modified immediates are
525 // represented here as encoded integers. The low 8 bits hold the immediate
526 // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
527 // the "Cmode" field of the instruction. The interfaces below treat the
528 // Op and Cmode values as a single 5-bit value.
529
530 inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
531 return (OpCmode << 8) | Val;
532 }
533 inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
534 return (ModImm >> 8) & 0x1f;
535 }
536 inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; }
537
538 /// decodeNEONModImm - Decode a NEON modified immediate value into the
539 /// element value and the element size in bits. (If the element size is
540 /// smaller than the vector, it is splatted into all the elements.)
541 inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
542 unsigned OpCmode = getNEONModImmOpCmode(ModImm);
543 unsigned Imm8 = getNEONModImmVal(ModImm);
544 uint64_t Val = 0;
545
546 if (OpCmode == 0xe) {
547 // 8-bit vector elements
548 Val = Imm8;
549 EltBits = 8;
550 } else if ((OpCmode & 0xc) == 0x8) {
551 // 16-bit vector elements
552 unsigned ByteNum = (OpCmode & 0x6) >> 1;
553 Val = Imm8 << (8 * ByteNum);
554 EltBits = 16;
555 } else if ((OpCmode & 0x8) == 0) {
556 // 32-bit vector elements, zero with one byte set
557 unsigned ByteNum = (OpCmode & 0x6) >> 1;
558 Val = Imm8 << (8 * ByteNum);
559 EltBits = 32;
560 } else if ((OpCmode & 0xe) == 0xc) {
561 // 32-bit vector elements, one byte with low bits set
562 unsigned ByteNum = 1 + (OpCmode & 0x1);
563 Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
564 EltBits = 32;
565 } else if (OpCmode == 0x1e) {
566 // 64-bit vector elements
567 for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
568 if ((ModImm >> ByteNum) & 1)
569 Val |= (uint64_t)0xff << (8 * ByteNum);
570 }
571 EltBits = 64;
572 } else {
573 llvm_unreachable("Unsupported NEON immediate")::llvm::llvm_unreachable_internal("Unsupported NEON immediate"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 573)
;
574 }
575 return Val;
576 }
577
578 // Generic validation for single-byte immediate (0X00, 00X0, etc).
579 inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
580 assert(Size >= 1 && Size <= 4 && "Invalid size")(static_cast <bool> (Size >= 1 && Size <=
4 && "Invalid size") ? void (0) : __assert_fail ("Size >= 1 && Size <= 4 && \"Invalid size\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 580, __extension__ __PRETTY_FUNCTION__))
;
581 unsigned count = 0;
582 for (unsigned i = 0; i < Size; ++i) {
583 if (Value & 0xff) count++;
584 Value >>= 8;
585 }
586 return count == 1;
587 }
588
589 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
590 inline bool isNEONi16splat(unsigned Value) {
591 if (Value > 0xffff)
592 return false;
593 // i16 value with set bits only in one byte X0 or 0X.
594 return Value == 0 || isNEONBytesplat(Value, 2);
595 }
596
597 // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
598 inline unsigned encodeNEONi16splat(unsigned Value) {
599 assert(isNEONi16splat(Value) && "Invalid NEON splat value")(static_cast <bool> (isNEONi16splat(Value) && "Invalid NEON splat value"
) ? void (0) : __assert_fail ("isNEONi16splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 599, __extension__ __PRETTY_FUNCTION__))
;
600 if (Value >= 0x100)
601 Value = (Value >> 8) | 0xa00;
602 else
603 Value |= 0x800;
604 return Value;
605 }
606
607 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
608 inline bool isNEONi32splat(unsigned Value) {
609 // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
610 return Value == 0 || isNEONBytesplat(Value, 4);
611 }
612
613 /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
614 inline unsigned encodeNEONi32splat(unsigned Value) {
615 assert(isNEONi32splat(Value) && "Invalid NEON splat value")(static_cast <bool> (isNEONi32splat(Value) && "Invalid NEON splat value"
) ? void (0) : __assert_fail ("isNEONi32splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 615, __extension__ __PRETTY_FUNCTION__))
;
616 if (Value >= 0x100 && Value <= 0xff00)
617 Value = (Value >> 8) | 0x200;
618 else if (Value > 0xffff && Value <= 0xff0000)
619 Value = (Value >> 16) | 0x400;
620 else if (Value > 0xffffff)
621 Value = (Value >> 24) | 0x600;
622 return Value;
623 }
624
625 //===--------------------------------------------------------------------===//
626 // Floating-point Immediates
627 //
628 inline float getFPImmFloat(unsigned Imm) {
629 // We expect an 8-bit binary encoding of a floating-point number here.
630 union {
631 uint32_t I;
632 float F;
633 } FPUnion;
634
635 uint8_t Sign = (Imm >> 7) & 0x1;
636 uint8_t Exp = (Imm >> 4) & 0x7;
637 uint8_t Mantissa = Imm & 0xf;
638
639 // 8-bit FP iEEEE Float Encoding
640 // abcd efgh aBbbbbbc defgh000 00000000 00000000
641 //
642 // where B = NOT(b);
643
644 FPUnion.I = 0;
645 FPUnion.I |= Sign << 31;
646 FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
647 FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
648 FPUnion.I |= (Exp & 0x3) << 23;
649 FPUnion.I |= Mantissa << 19;
650 return FPUnion.F;
651 }
652
653 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
654 /// floating-point value. If the value cannot be represented as an 8-bit
655 /// floating-point value, then return -1.
656 inline int getFP16Imm(const APInt &Imm) {
657 uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
658 int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
659 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
660
661 // We can handle 4 bits of mantissa.
662 // mantissa = (16+UInt(e:f:g:h))/16.
663 if (Mantissa & 0x3f)
664 return -1;
665 Mantissa >>= 6;
666
667 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
668 if (Exp < -3 || Exp > 4)
669 return -1;
670 Exp = ((Exp+3) & 0x7) ^ 4;
671
672 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
673 }
674
675 inline int getFP16Imm(const APFloat &FPImm) {
676 return getFP16Imm(FPImm.bitcastToAPInt());
677 }
678
679 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
680 /// floating-point value. If the value cannot be represented as an 8-bit
681 /// floating-point value, then return -1.
682 inline int getFP32Imm(const APInt &Imm) {
683 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
684 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
685 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
686
687 // We can handle 4 bits of mantissa.
688 // mantissa = (16+UInt(e:f:g:h))/16.
689 if (Mantissa & 0x7ffff)
690 return -1;
691 Mantissa >>= 19;
692 if ((Mantissa & 0xf) != Mantissa)
693 return -1;
694
695 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
696 if (Exp < -3 || Exp > 4)
697 return -1;
698 Exp = ((Exp+3) & 0x7) ^ 4;
699
700 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
701 }
702
703 inline int getFP32Imm(const APFloat &FPImm) {
704 return getFP32Imm(FPImm.bitcastToAPInt());
705 }
706
707 /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
708 /// floating-point value. If the value cannot be represented as an 8-bit
709 /// floating-point value, then return -1.
710 inline int getFP64Imm(const APInt &Imm) {
711 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
712 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
713 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
714
715 // We can handle 4 bits of mantissa.
716 // mantissa = (16+UInt(e:f:g:h))/16.
717 if (Mantissa & 0xffffffffffffULL)
718 return -1;
719 Mantissa >>= 48;
720 if ((Mantissa & 0xf) != Mantissa)
721 return -1;
722
723 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
724 if (Exp < -3 || Exp > 4)
725 return -1;
726 Exp = ((Exp+3) & 0x7) ^ 4;
727
728 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
729 }
730
731 inline int getFP64Imm(const APFloat &FPImm) {
732 return getFP64Imm(FPImm.bitcastToAPInt());
733 }
734
735} // end namespace ARM_AM
736} // end namespace llvm
737
738#endif
739