Bug Summary

File:lib/Target/ARM/ARMTargetTransformInfo.cpp
Warning:line 221, column 16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'

Annotated Source Code

/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/ARMTargetTransformInfo.cpp

1//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "ARMTargetTransformInfo.h"
11#include "ARMSubtarget.h"
12#include "MCTargetDesc/ARMAddressingModes.h"
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/Analysis/LoopInfo.h"
16#include "llvm/CodeGen/CostTable.h"
17#include "llvm/CodeGen/ISDOpcodes.h"
18#include "llvm/CodeGen/MachineValueType.h"
19#include "llvm/CodeGen/ValueTypes.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/CallSite.h"
22#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/DerivedTypes.h"
24#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Instructions.h"
26#include "llvm/IR/Type.h"
27#include "llvm/MC/SubtargetFeature.h"
28#include "llvm/Support/Casting.h"
29#include "llvm/Target/TargetMachine.h"
30#include <algorithm>
31#include <cassert>
32#include <cstdint>
33#include <utility>
34
35using namespace llvm;
36
37#define DEBUG_TYPE"armtti" "armtti"
38
39bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
40 const Function *Callee) const {
41 const TargetMachine &TM = getTLI()->getTargetMachine();
42 const FeatureBitset &CallerBits =
43 TM.getSubtargetImpl(*Caller)->getFeatureBits();
44 const FeatureBitset &CalleeBits =
45 TM.getSubtargetImpl(*Callee)->getFeatureBits();
46
47 // To inline a callee, all features not in the whitelist must match exactly.
48 bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
49 (CalleeBits & ~InlineFeatureWhitelist);
50 // For features in the whitelist, the callee's features must be a subset of
51 // the callers'.
52 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
53 (CalleeBits & InlineFeatureWhitelist);
54 return MatchExact && MatchSubset;
55}
56
57int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
58 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 58, __extension__ __PRETTY_FUNCTION__))
;
59
60 unsigned Bits = Ty->getPrimitiveSizeInBits();
61 if (Bits == 0 || Imm.getActiveBits() >= 64)
8
Assuming 'Bits' is not equal to 0
9
Taking false branch
62 return 4;
63
64 int64_t SImmVal = Imm.getSExtValue();
65 uint64_t ZImmVal = Imm.getZExtValue();
66 if (!ST->isThumb()) {
10
Assuming the condition is false
11
Taking false branch
67 if ((SImmVal >= 0 && SImmVal < 65536) ||
68 (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
69 (ARM_AM::getSOImmVal(~ZImmVal) != -1))
70 return 1;
71 return ST->hasV6T2Ops() ? 2 : 3;
72 }
73 if (ST->isThumb2()) {
12
Assuming the condition is false
13
Taking false branch
74 if ((SImmVal >= 0 && SImmVal < 65536) ||
75 (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
76 (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
77 return 1;
78 return ST->hasV6T2Ops() ? 2 : 3;
79 }
80 // Thumb1.
81 if (SImmVal >= 0 && SImmVal < 256)
14
Assuming 'SImmVal' is < 0
82 return 1;
83 if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
15
Calling 'isThumbImmShiftedVal'
84 return 2;
85 // Load from constantpool.
86 return 3;
87}
88
89// Constants smaller than 256 fit in the immediate field of
90// Thumb1 instructions so we return a zero cost and 1 otherwise.
91int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
92 const APInt &Imm, Type *Ty) {
93 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
94 return 0;
95
96 return 1;
97}
98
99int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
100 Type *Ty) {
101 // Division by a constant can be turned into multiplication, but only if we
102 // know it's constant. So it's not so much that the immediate is cheap (it's
103 // not), but that the alternative is worse.
104 // FIXME: this is probably unneeded with GlobalISel.
105 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
1
Assuming 'Opcode' is not equal to SDiv
2
Assuming 'Opcode' is not equal to UDiv
106 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
3
Assuming 'Opcode' is not equal to SRem
4
Assuming 'Opcode' is not equal to URem
107 Idx == 1)
108 return 0;
109
110 if (Opcode == Instruction::And)
5
Assuming 'Opcode' is equal to And
6
Taking true branch
111 // Conversion to BIC is free, and means we can use ~Imm instead.
112 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
7
Calling 'ARMTTIImpl::getIntImmCost'
113
114 if (Opcode == Instruction::Add)
115 // Conversion to SUB is free, and means we can use -Imm instead.
116 return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
117
118 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
119 Ty->getIntegerBitWidth() == 32) {
120 int64_t NegImm = -Imm.getSExtValue();
121 if (ST->isThumb2() && NegImm < 1<<12)
122 // icmp X, #-C -> cmn X, #C
123 return 0;
124 if (ST->isThumb() && NegImm < 1<<8)
125 // icmp X, #-C -> adds X, #C
126 return 0;
127 }
128
129 return getIntImmCost(Imm, Ty);
130}
131
132int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
133 const Instruction *I) {
134 int ISD = TLI->InstructionOpcodeToISD(Opcode);
135 assert(ISD && "Invalid opcode")(static_cast <bool> (ISD && "Invalid opcode") ?
void (0) : __assert_fail ("ISD && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 135, __extension__ __PRETTY_FUNCTION__))
;
136
137 // Single to/from double precision conversions.
138 static const CostTblEntry NEONFltDblTbl[] = {
139 // Vector fptrunc/fpext conversions.
140 { ISD::FP_ROUND, MVT::v2f64, 2 },
141 { ISD::FP_EXTEND, MVT::v2f32, 2 },
142 { ISD::FP_EXTEND, MVT::v4f32, 4 }
143 };
144
145 if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
146 ISD == ISD::FP_EXTEND)) {
147 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
148 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
149 return LT.first * Entry->Cost;
150 }
151
152 EVT SrcTy = TLI->getValueType(DL, Src);
153 EVT DstTy = TLI->getValueType(DL, Dst);
154
155 if (!SrcTy.isSimple() || !DstTy.isSimple())
156 return BaseT::getCastInstrCost(Opcode, Dst, Src);
157
158 // Some arithmetic, load and store operations have specific instructions
159 // to cast up/down their types automatically at no extra cost.
160 // TODO: Get these tables to know at least what the related operations are.
161 static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
162 { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
163 { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
164 { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
165 { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
166 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
167 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
168
169 // The number of vmovl instructions for the extension.
170 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
171 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
172 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
173 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
174 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
175 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
176 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
177 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
178 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
179 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
180
181 // Operations that we legalize using splitting.
182 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
183 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
184
185 // Vector float <-> i32 conversions.
186 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
187 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
188
189 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
190 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
191 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
192 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
193 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
194 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
195 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
196 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
197 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
198 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
199 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
200 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
201 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
202 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
203 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
204 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
205 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
206 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
207 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
208 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
209
210 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
211 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
212 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
213 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
214 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
215 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
216
217 // Vector double <-> i32 conversions.
218 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
219 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
220
221 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
222 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
223 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
224 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
225 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
226 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
227
228 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
229 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
230 { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
231 { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
232 { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
233 { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
234 };
235
236 if (SrcTy.isVector() && ST->hasNEON()) {
237 if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
238 DstTy.getSimpleVT(),
239 SrcTy.getSimpleVT()))
240 return Entry->Cost;
241 }
242
243 // Scalar float to integer conversions.
244 static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
245 { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
246 { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
247 { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
248 { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
249 { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
250 { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
251 { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
252 { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
253 { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
254 { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
255 { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
256 { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
257 { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
258 { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
259 { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
260 { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
261 { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
262 { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
263 { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
264 { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
265 };
266 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
267 if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
268 DstTy.getSimpleVT(),
269 SrcTy.getSimpleVT()))
270 return Entry->Cost;
271 }
272
273 // Scalar integer to float conversions.
274 static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
275 { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
276 { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
277 { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
278 { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
279 { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
280 { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
281 { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
282 { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
283 { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
284 { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
285 { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
286 { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
287 { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
288 { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
289 { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
290 { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
291 { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
292 { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
293 { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
294 { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
295 };
296
297 if (SrcTy.isInteger() && ST->hasNEON()) {
298 if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
299 ISD, DstTy.getSimpleVT(),
300 SrcTy.getSimpleVT()))
301 return Entry->Cost;
302 }
303
304 // Scalar integer conversion costs.
305 static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
306 // i16 -> i64 requires two dependent operations.
307 { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
308
309 // Truncates on i64 are assumed to be free.
310 { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
311 { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
312 { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
313 { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
314 };
315
316 if (SrcTy.isInteger()) {
317 if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
318 DstTy.getSimpleVT(),
319 SrcTy.getSimpleVT()))
320 return Entry->Cost;
321 }
322
323 return BaseT::getCastInstrCost(Opcode, Dst, Src);
324}
325
326int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
327 unsigned Index) {
328 // Penalize inserting into an D-subregister. We end up with a three times
329 // lower estimated throughput on swift.
330 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
331 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
332 return 3;
333
334 if ((Opcode == Instruction::InsertElement ||
335 Opcode == Instruction::ExtractElement)) {
336 // Cross-class copies are expensive on many microarchitectures,
337 // so assume they are expensive by default.
338 if (ValTy->getVectorElementType()->isIntegerTy())
339 return 3;
340
341 // Even if it's not a cross class copy, this likely leads to mixing
342 // of NEON and VFP code and should be therefore penalized.
343 if (ValTy->isVectorTy() &&
344 ValTy->getScalarSizeInBits() <= 32)
345 return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
346 }
347
348 return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
349}
350
351int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
352 const Instruction *I) {
353 int ISD = TLI->InstructionOpcodeToISD(Opcode);
354 // On NEON a a vector select gets lowered to vbsl.
355 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
356 // Lowering of some vector selects is currently far from perfect.
357 static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
358 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
359 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
360 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
361 };
362
363 EVT SelCondTy = TLI->getValueType(DL, CondTy);
364 EVT SelValTy = TLI->getValueType(DL, ValTy);
365 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
366 if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
367 SelCondTy.getSimpleVT(),
368 SelValTy.getSimpleVT()))
369 return Entry->Cost;
370 }
371
372 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
373 return LT.first;
374 }
375
376 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
377}
378
379int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
380 const SCEV *Ptr) {
381 // Address computations in vectorized code with non-consecutive addresses will
382 // likely result in more instructions compared to scalar code where the
383 // computation can more often be merged into the index mode. The resulting
384 // extra micro-ops can significantly decrease throughput.
385 unsigned NumVectorInstToHideOverhead = 10;
386 int MaxMergeDistance = 64;
387
388 if (Ty->isVectorTy() && SE &&
389 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
390 return NumVectorInstToHideOverhead;
391
392 // In many cases the address computation is not merged into the instruction
393 // addressing mode.
394 return 1;
395}
396
397int ARMTTIImpl::getFPOpCost(Type *Ty) {
398 // Use similar logic that's in ARMISelLowering:
399 // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
400 // to VFP.
401
402 if (ST->hasVFP2() && !ST->isThumb1Only()) {
403 if (Ty->isFloatTy()) {
404 return TargetTransformInfo::TCC_Basic;
405 }
406
407 if (Ty->isDoubleTy()) {
408 return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
409 TargetTransformInfo::TCC_Basic;
410 }
411 }
412
413 return TargetTransformInfo::TCC_Expensive;
414}
415
416int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
417 Type *SubTp) {
418 // We only handle costs of reverse and alternate shuffles for now.
419 if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
420 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
421
422 if (Kind == TTI::SK_Reverse) {
423 static const CostTblEntry NEONShuffleTbl[] = {
424 // Reverse shuffle cost one instruction if we are shuffling within a
425 // double word (vrev) or two if we shuffle a quad word (vrev, vext).
426 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
427 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
428 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
429 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
430
431 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
432 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
433 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
434 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
435
436 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
437
438 if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
439 LT.second))
440 return LT.first * Entry->Cost;
441
442 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
443 }
444 if (Kind == TTI::SK_Alternate) {
445 static const CostTblEntry NEONAltShuffleTbl[] = {
446 // Alt shuffle cost table for ARM. Cost is the number of instructions
447 // required to create the shuffled vector.
448
449 {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
450 {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
451 {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
452 {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
453
454 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
455 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
456 {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
457
458 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
459
460 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
461
462 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
463 if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
464 ISD::VECTOR_SHUFFLE, LT.second))
465 return LT.first * Entry->Cost;
466 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
467 }
468 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
469}
470
471int ARMTTIImpl::getArithmeticInstrCost(
472 unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
473 TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
474 TTI::OperandValueProperties Opd2PropInfo,
475 ArrayRef<const Value *> Args) {
476 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
477 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
478
479 const unsigned FunctionCallDivCost = 20;
480 const unsigned ReciprocalDivCost = 10;
481 static const CostTblEntry CostTbl[] = {
482 // Division.
483 // These costs are somewhat random. Choose a cost of 20 to indicate that
484 // vectorizing devision (added function call) is going to be very expensive.
485 // Double registers types.
486 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
487 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
488 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
489 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
490 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
491 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
492 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
493 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
494 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
495 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
496 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
497 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
498 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
499 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
500 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
501 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
502 // Quad register types.
503 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
504 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
505 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
506 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
507 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
508 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
509 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
510 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
511 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
512 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
513 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
514 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
515 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
516 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
517 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
518 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
519 // Multiplication.
520 };
521
522 if (ST->hasNEON())
523 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
524 return LT.first * Entry->Cost;
525
526 int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
527 Opd1PropInfo, Opd2PropInfo);
528
529 // This is somewhat of a hack. The problem that we are facing is that SROA
530 // creates a sequence of shift, and, or instructions to construct values.
531 // These sequences are recognized by the ISel and have zero-cost. Not so for
532 // the vectorized code. Because we have support for v2i64 but not i64 those
533 // sequences look particularly beneficial to vectorize.
534 // To work around this we increase the cost of v2i64 operations to make them
535 // seem less beneficial.
536 if (LT.second == MVT::v2i64 &&
537 Op2Info == TargetTransformInfo::OK_UniformConstantValue)
538 Cost += 4;
539
540 return Cost;
541}
542
543int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
544 unsigned AddressSpace, const Instruction *I) {
545 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
546
547 if (Src->isVectorTy() && Alignment != 16 &&
548 Src->getVectorElementType()->isDoubleTy()) {
549 // Unaligned loads/stores are extremely inefficient.
550 // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
551 return LT.first * 4;
552 }
553 return LT.first;
554}
555
556int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
557 unsigned Factor,
558 ArrayRef<unsigned> Indices,
559 unsigned Alignment,
560 unsigned AddressSpace) {
561 assert(Factor >= 2 && "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 561, __extension__ __PRETTY_FUNCTION__))
;
562 assert(isa<VectorType>(VecTy) && "Expect a vector type")(static_cast <bool> (isa<VectorType>(VecTy) &&
"Expect a vector type") ? void (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/ARMTargetTransformInfo.cpp"
, 562, __extension__ __PRETTY_FUNCTION__))
;
563
564 // vldN/vstN doesn't support vector types of i64/f64 element.
565 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
566
567 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
568 unsigned NumElts = VecTy->getVectorNumElements();
569 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
570
571 // vldN/vstN only support legal vector types of size 64 or 128 in bits.
572 // Accesses having vector types that are a multiple of 128 bits can be
573 // matched to more than one vldN/vstN instruction.
574 if (NumElts % Factor == 0 &&
575 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
576 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
577 }
578
579 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
580 Alignment, AddressSpace);
581}
582
583void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
584 TTI::UnrollingPreferences &UP) {
585 // Only currently enable these preferences for M-Class cores.
586 if (!ST->isMClass())
587 return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
588
589 // Disable loop unrolling for Oz and Os.
590 UP.OptSizeThreshold = 0;
591 UP.PartialOptSizeThreshold = 0;
592 if (L->getHeader()->getParent()->optForSize())
593 return;
594
595 // Only enable on Thumb-2 targets.
596 if (!ST->isThumb2())
597 return;
598
599 SmallVector<BasicBlock*, 4> ExitingBlocks;
600 L->getExitingBlocks(ExitingBlocks);
601 DEBUG(dbgs() << "Loop has:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
602 << "Blocks: " << L->getNumBlocks() << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
603 << "Exit blocks: " << ExitingBlocks.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Loop has:\n" << "Blocks: "
<< L->getNumBlocks() << "\n" << "Exit blocks: "
<< ExitingBlocks.size() << "\n"; } } while (false
)
;
604
605 // Only allow another exit other than the latch. This acts as an early exit
606 // as it mirrors the profitability calculation of the runtime unroller.
607 if (ExitingBlocks.size() > 2)
608 return;
609
610 // Limit the CFG of the loop body for targets with a branch predictor.
611 // Allowing 4 blocks permits if-then-else diamonds in the body.
612 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
613 return;
614
615 // Scan the loop: don't unroll loops with calls as this could prevent
616 // inlining.
617 unsigned Cost = 0;
618 for (auto *BB : L->getBlocks()) {
619 for (auto &I : *BB) {
620 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
621 ImmutableCallSite CS(&I);
622 if (const Function *F = CS.getCalledFunction()) {
623 if (!isLoweredToCall(F))
624 continue;
625 }
626 return;
627 }
628 SmallVector<const Value*, 4> Operands(I.value_op_begin(),
629 I.value_op_end());
630 Cost += getUserCost(&I, Operands);
631 }
632 }
633
634 DEBUG(dbgs() << "Cost of loop: " << Cost << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("armtti")) { dbgs() << "Cost of loop: " << Cost <<
"\n"; } } while (false)
;
635
636 UP.Partial = true;
637 UP.Runtime = true;
638 UP.UnrollRemainder = true;
639 UP.DefaultUnrollRuntimeCount = 4;
640
641 // Force unrolling small loops can be very useful because of the branch
642 // taken cost of the backedge.
643 if (Cost < 12)
644 UP.Force = true;
645}

/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h

1//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the ARM addressing mode implementation stuff.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
15#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/APInt.h"
19#include "llvm/Support/ErrorHandling.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22
23namespace llvm {
24
25/// ARM_AM - ARM Addressing Mode Stuff
26namespace ARM_AM {
27 enum ShiftOpc {
28 no_shift = 0,
29 asr,
30 lsl,
31 lsr,
32 ror,
33 rrx
34 };
35
36 enum AddrOpc {
37 sub = 0,
38 add
39 };
40
41 inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
42
43 inline const char *getShiftOpcStr(ShiftOpc Op) {
44 switch (Op) {
45 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 45)
;
46 case ARM_AM::asr: return "asr";
47 case ARM_AM::lsl: return "lsl";
48 case ARM_AM::lsr: return "lsr";
49 case ARM_AM::ror: return "ror";
50 case ARM_AM::rrx: return "rrx";
51 }
52 }
53
54 inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
55 switch (Op) {
56 default: llvm_unreachable("Unknown shift opc!")::llvm::llvm_unreachable_internal("Unknown shift opc!", "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 56)
;
57 case ARM_AM::asr: return 2;
58 case ARM_AM::lsl: return 0;
59 case ARM_AM::lsr: return 1;
60 case ARM_AM::ror: return 3;
61 }
62 }
63
64 enum AMSubMode {
65 bad_am_submode = 0,
66 ia,
67 ib,
68 da,
69 db
70 };
71
72 inline const char *getAMSubModeStr(AMSubMode Mode) {
73 switch (Mode) {
74 default: llvm_unreachable("Unknown addressing sub-mode!")::llvm::llvm_unreachable_internal("Unknown addressing sub-mode!"
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 74)
;
75 case ARM_AM::ia: return "ia";
76 case ARM_AM::ib: return "ib";
77 case ARM_AM::da: return "da";
78 case ARM_AM::db: return "db";
79 }
80 }
81
82 /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
83 ///
84 inline unsigned rotr32(unsigned Val, unsigned Amt) {
85 assert(Amt < 32 && "Invalid rotate amount")(static_cast <bool> (Amt < 32 && "Invalid rotate amount"
) ? void (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 85, __extension__ __PRETTY_FUNCTION__))
;
86 return (Val >> Amt) | (Val << ((32-Amt)&31));
87 }
88
89 /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
90 ///
91 inline unsigned rotl32(unsigned Val, unsigned Amt) {
92 assert(Amt < 32 && "Invalid rotate amount")(static_cast <bool> (Amt < 32 && "Invalid rotate amount"
) ? void (0) : __assert_fail ("Amt < 32 && \"Invalid rotate amount\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 92, __extension__ __PRETTY_FUNCTION__))
;
93 return (Val << Amt) | (Val >> ((32-Amt)&31));
94 }
95
96 //===--------------------------------------------------------------------===//
97 // Addressing Mode #1: shift_operand with registers
98 //===--------------------------------------------------------------------===//
99 //
100 // This 'addressing mode' is used for arithmetic instructions. It can
101 // represent things like:
102 // reg
103 // reg [asr|lsl|lsr|ror|rrx] reg
104 // reg [asr|lsl|lsr|ror|rrx] imm
105 //
106 // This is stored three operands [rega, regb, opc]. The first is the base
107 // reg, the second is the shift amount (or reg0 if not present or imm). The
108 // third operand encodes the shift opcode and the imm if a reg isn't present.
109 //
110 inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
111 return ShOp | (Imm << 3);
112 }
113 inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; }
114 inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); }
115
116 /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
117 /// the 8-bit imm value.
118 inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; }
119 /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
120 /// the rotate amount.
121 inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; }
122
123 /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
124 /// computing the rotate amount to use. If this immediate value cannot be
125 /// handled with a single shifter-op, determine a good rotate amount that will
126 /// take a maximal chunk of bits out of the immediate.
127 inline unsigned getSOImmValRotate(unsigned Imm) {
128 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
129 // of zero.
130 if ((Imm & ~255U) == 0) return 0;
131
132 // Use CTZ to compute the rotate amount.
133 unsigned TZ = countTrailingZeros(Imm);
134
135 // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
136 // not 9.
137 unsigned RotAmt = TZ & ~1;
138
139 // If we can handle this spread, return it.
140 if ((rotr32(Imm, RotAmt) & ~255U) == 0)
141 return (32-RotAmt)&31; // HW rotates right, not left.
142
143 // For values like 0xF000000F, we should ignore the low 6 bits, then
144 // retry the hunt.
145 if (Imm & 63U) {
146 unsigned TZ2 = countTrailingZeros(Imm & ~63U);
147 unsigned RotAmt2 = TZ2 & ~1;
148 if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
149 return (32-RotAmt2)&31; // HW rotates right, not left.
150 }
151
152 // Otherwise, we have no way to cover this span of bits with a single
153 // shifter_op immediate. Return a chunk of bits that will be useful to
154 // handle.
155 return (32-RotAmt)&31; // HW rotates right, not left.
156 }
157
158 /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
159 /// into an shifter_operand immediate operand, return the 12-bit encoding for
160 /// it. If not, return -1.
161 inline int getSOImmVal(unsigned Arg) {
162 // 8-bit (or less) immediates are trivially shifter_operands with a rotate
163 // of zero.
164 if ((Arg & ~255U) == 0) return Arg;
165
166 unsigned RotAmt = getSOImmValRotate(Arg);
167
168 // If this cannot be handled with a single shifter_op, bail out.
169 if (rotr32(~255U, RotAmt) & Arg)
170 return -1;
171
172 // Encode this correctly.
173 return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
174 }
175
176 /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
177 /// or'ing together two SOImmVal's.
178 inline bool isSOImmTwoPartVal(unsigned V) {
179 // If this can be handled with a single shifter_op, bail out.
180 V = rotr32(~255U, getSOImmValRotate(V)) & V;
181 if (V == 0)
182 return false;
183
184 // If this can be handled with two shifter_op's, accept.
185 V = rotr32(~255U, getSOImmValRotate(V)) & V;
186 return V == 0;
187 }
188
189 /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
190 /// return the first chunk of it.
191 inline unsigned getSOImmTwoPartFirst(unsigned V) {
192 return rotr32(255U, getSOImmValRotate(V)) & V;
193 }
194
195 /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
196 /// return the second chunk of it.
197 inline unsigned getSOImmTwoPartSecond(unsigned V) {
198 // Mask out the first hunk.
199 V = rotr32(~255U, getSOImmValRotate(V)) & V;
200
201 // Take what's left.
202 assert(V == (rotr32(255U, getSOImmValRotate(V)) & V))(static_cast <bool> (V == (rotr32(255U, getSOImmValRotate
(V)) & V)) ? void (0) : __assert_fail ("V == (rotr32(255U, getSOImmValRotate(V)) & V)"
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 202, __extension__ __PRETTY_FUNCTION__))
;
203 return V;
204 }
205
206 /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
207 /// by a left shift. Returns the shift amount to use.
208 inline unsigned getThumbImmValShift(unsigned Imm) {
209 // 8-bit (or less) immediates are trivially immediate operand with a shift
210 // of zero.
211 if ((Imm & ~255U) == 0) return 0;
212
213 // Use CTZ to compute the shift amount.
214 return countTrailingZeros(Imm);
215 }
216
217 /// isThumbImmShiftedVal - Return true if the specified value can be obtained
218 /// by left shifting a 8-bit immediate.
219 inline bool isThumbImmShiftedVal(unsigned V) {
220 // If this can be handled with
221 V = (~255U << getThumbImmValShift(V)) & V;
16
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'unsigned int'
222 return V == 0;
223 }
224
225 /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
226 /// by a left shift. Returns the shift amount to use.
227 inline unsigned getThumbImm16ValShift(unsigned Imm) {
228 // 16-bit (or less) immediates are trivially immediate operand with a shift
229 // of zero.
230 if ((Imm & ~65535U) == 0) return 0;
231
232 // Use CTZ to compute the shift amount.
233 return countTrailingZeros(Imm);
234 }
235
236 /// isThumbImm16ShiftedVal - Return true if the specified value can be
237 /// obtained by left shifting a 16-bit immediate.
238 inline bool isThumbImm16ShiftedVal(unsigned V) {
239 // If this can be handled with
240 V = (~65535U << getThumbImm16ValShift(V)) & V;
241 return V == 0;
242 }
243
244 /// getThumbImmNonShiftedVal - If V is a value that satisfies
245 /// isThumbImmShiftedVal, return the non-shiftd value.
246 inline unsigned getThumbImmNonShiftedVal(unsigned V) {
247 return V >> getThumbImmValShift(V);
248 }
249
250
251 /// getT2SOImmValSplat - Return the 12-bit encoded representation
252 /// if the specified value can be obtained by splatting the low 8 bits
253 /// into every other byte or every byte of a 32-bit value. i.e.,
254 /// 00000000 00000000 00000000 abcdefgh control = 0
255 /// 00000000 abcdefgh 00000000 abcdefgh control = 1
256 /// abcdefgh 00000000 abcdefgh 00000000 control = 2
257 /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3
258 /// Return -1 if none of the above apply.
259 /// See ARM Reference Manual A6.3.2.
260 inline int getT2SOImmValSplatVal(unsigned V) {
261 unsigned u, Vs, Imm;
262 // control = 0
263 if ((V & 0xffffff00) == 0)
264 return V;
265
266 // If the value is zeroes in the first byte, just shift those off
267 Vs = ((V & 0xff) == 0) ? V >> 8 : V;
268 // Any passing value only has 8 bits of payload, splatted across the word
269 Imm = Vs & 0xff;
270 // Likewise, any passing values have the payload splatted into the 3rd byte
271 u = Imm | (Imm << 16);
272
273 // control = 1 or 2
274 if (Vs == u)
275 return (((Vs == V) ? 1 : 2) << 8) | Imm;
276
277 // control = 3
278 if (Vs == (u | (u << 8)))
279 return (3 << 8) | Imm;
280
281 return -1;
282 }
283
284 /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
285 /// specified value is a rotated 8-bit value. Return -1 if no rotation
286 /// encoding is possible.
287 /// See ARM Reference Manual A6.3.2.
288 inline int getT2SOImmValRotateVal(unsigned V) {
289 unsigned RotAmt = countLeadingZeros(V);
290 if (RotAmt >= 24)
291 return -1;
292
293 // If 'Arg' can be handled with a single shifter_op return the value.
294 if ((rotr32(0xff000000U, RotAmt) & V) == V)
295 return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
296
297 return -1;
298 }
299
300 /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
301 /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
302 /// encoding for it. If not, return -1.
303 /// See ARM Reference Manual A6.3.2.
304 inline int getT2SOImmVal(unsigned Arg) {
305 // If 'Arg' is an 8-bit splat, then get the encoded value.
306 int Splat = getT2SOImmValSplatVal(Arg);
307 if (Splat != -1)
308 return Splat;
309
310 // If 'Arg' can be handled with a single shifter_op return the value.
311 int Rot = getT2SOImmValRotateVal(Arg);
312 if (Rot != -1)
313 return Rot;
314
315 return -1;
316 }
317
318 inline unsigned getT2SOImmValRotate(unsigned V) {
319 if ((V & ~255U) == 0) return 0;
320 // Use CTZ to compute the rotate amount.
321 unsigned RotAmt = countTrailingZeros(V);
322 return (32 - RotAmt) & 31;
323 }
324
325 inline bool isT2SOImmTwoPartVal(unsigned Imm) {
326 unsigned V = Imm;
327 // Passing values can be any combination of splat values and shifter
328 // values. If this can be handled with a single shifter or splat, bail
329 // out. Those should be handled directly, not with a two-part val.
330 if (getT2SOImmValSplatVal(V) != -1)
331 return false;
332 V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
333 if (V == 0)
334 return false;
335
336 // If this can be handled as an immediate, accept.
337 if (getT2SOImmVal(V) != -1) return true;
338
339 // Likewise, try masking out a splat value first.
340 V = Imm;
341 if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
342 V &= ~0xff00ff00U;
343 else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
344 V &= ~0x00ff00ffU;
345 // If what's left can be handled as an immediate, accept.
346 if (getT2SOImmVal(V) != -1) return true;
347
348 // Otherwise, do not accept.
349 return false;
350 }
351
352 inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
353 assert (isT2SOImmTwoPartVal(Imm) &&(static_cast <bool> (isT2SOImmTwoPartVal(Imm) &&
"Immedate cannot be encoded as two part immediate!") ? void (
0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __extension__ __PRETTY_FUNCTION__))
354 "Immedate cannot be encoded as two part immediate!")(static_cast <bool> (isT2SOImmTwoPartVal(Imm) &&
"Immedate cannot be encoded as two part immediate!") ? void (
0) : __assert_fail ("isT2SOImmTwoPartVal(Imm) && \"Immedate cannot be encoded as two part immediate!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 354, __extension__ __PRETTY_FUNCTION__))
;
355 // Try a shifter operand as one part
356 unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
357 // If the rest is encodable as an immediate, then return it.
358 if (getT2SOImmVal(V) != -1) return V;
359
360 // Try masking out a splat value first.
361 if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
362 return Imm & 0xff00ff00U;
363
364 // The other splat is all that's left as an option.
365 assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1)(static_cast <bool> (getT2SOImmValSplatVal(Imm & 0x00ff00ffU
) != -1) ? void (0) : __assert_fail ("getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1"
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 365, __extension__ __PRETTY_FUNCTION__))
;
366 return Imm & 0x00ff00ffU;
367 }
368
369 inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
370 // Mask out the first hunk
371 Imm ^= getT2SOImmTwoPartFirst(Imm);
372 // Return what's left
373 assert (getT2SOImmVal(Imm) != -1 &&(static_cast <bool> (getT2SOImmVal(Imm) != -1 &&
"Unable to encode second part of T2 two part SO immediate") ?
void (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __extension__ __PRETTY_FUNCTION__))
374 "Unable to encode second part of T2 two part SO immediate")(static_cast <bool> (getT2SOImmVal(Imm) != -1 &&
"Unable to encode second part of T2 two part SO immediate") ?
void (0) : __assert_fail ("getT2SOImmVal(Imm) != -1 && \"Unable to encode second part of T2 two part SO immediate\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 374, __extension__ __PRETTY_FUNCTION__))
;
375 return Imm;
376 }
377
378
379 //===--------------------------------------------------------------------===//
380 // Addressing Mode #2
381 //===--------------------------------------------------------------------===//
382 //
383 // This is used for most simple load/store instructions.
384 //
385 // addrmode2 := reg +/- reg shop imm
386 // addrmode2 := reg +/- imm12
387 //
388 // The first operand is always a Reg. The second operand is a reg if in
389 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
390 // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
391 // fourth operand 16-17 encodes the index mode.
392 //
393 // If this addressing mode is a frame index (before prolog/epilog insertion
394 // and code rewriting), this operand will have the form: FI#, reg0, <offs>
395 // with no shift amount for the frame offset.
396 //
397 inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
398 unsigned IdxMode = 0) {
399 assert(Imm12 < (1 << 12) && "Imm too large!")(static_cast <bool> (Imm12 < (1 << 12) &&
"Imm too large!") ? void (0) : __assert_fail ("Imm12 < (1 << 12) && \"Imm too large!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 399, __extension__ __PRETTY_FUNCTION__))
;
400 bool isSub = Opc == sub;
401 return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
402 }
403 inline unsigned getAM2Offset(unsigned AM2Opc) {
404 return AM2Opc & ((1 << 12)-1);
405 }
406 inline AddrOpc getAM2Op(unsigned AM2Opc) {
407 return ((AM2Opc >> 12) & 1) ? sub : add;
408 }
409 inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
410 return (ShiftOpc)((AM2Opc >> 13) & 7);
411 }
412 inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); }
413
414 //===--------------------------------------------------------------------===//
415 // Addressing Mode #3
416 //===--------------------------------------------------------------------===//
417 //
418 // This is used for sign-extending loads, and load/store-pair instructions.
419 //
420 // addrmode3 := reg +/- reg
421 // addrmode3 := reg +/- imm8
422 //
423 // The first operand is always a Reg. The second operand is a reg if in
424 // reg/reg form, otherwise it's reg#0. The third field encodes the operation
425 // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
426 // index mode.
427
428 /// getAM3Opc - This function encodes the addrmode3 opc field.
429 inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
430 unsigned IdxMode = 0) {
431 bool isSub = Opc == sub;
432 return ((int)isSub << 8) | Offset | (IdxMode << 9);
433 }
434 inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; }
435 inline AddrOpc getAM3Op(unsigned AM3Opc) {
436 return ((AM3Opc >> 8) & 1) ? sub : add;
437 }
438 inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); }
439
440 //===--------------------------------------------------------------------===//
441 // Addressing Mode #4
442 //===--------------------------------------------------------------------===//
443 //
444 // This is used for load / store multiple instructions.
445 //
446 // addrmode4 := reg, <mode>
447 //
448 // The four modes are:
449 // IA - Increment after
450 // IB - Increment before
451 // DA - Decrement after
452 // DB - Decrement before
453 // For VFP instructions, only the IA and DB modes are valid.
454
455 inline AMSubMode getAM4SubMode(unsigned Mode) {
456 return (AMSubMode)(Mode & 0x7);
457 }
458
459 inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; }
460
461 //===--------------------------------------------------------------------===//
462 // Addressing Mode #5
463 //===--------------------------------------------------------------------===//
464 //
465 // This is used for coprocessor instructions, such as FP load/stores.
466 //
467 // addrmode5 := reg +/- imm8*4
468 //
469 // The first operand is always a Reg. The second operand encodes the
470 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
471
472 /// getAM5Opc - This function encodes the addrmode5 opc field.
473 inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
474 bool isSub = Opc == sub;
475 return ((int)isSub << 8) | Offset;
476 }
477 inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; }
478 inline AddrOpc getAM5Op(unsigned AM5Opc) {
479 return ((AM5Opc >> 8) & 1) ? sub : add;
480 }
481
482 //===--------------------------------------------------------------------===//
483 // Addressing Mode #5 FP16
484 //===--------------------------------------------------------------------===//
485 //
486 // This is used for coprocessor instructions, such as 16-bit FP load/stores.
487 //
488 // addrmode5fp16 := reg +/- imm8*2
489 //
490 // The first operand is always a Reg. The second operand encodes the
491 // operation (add or subtract) in bit 8 and the immediate in bits 0-7.
492
493 /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field.
494 inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) {
495 bool isSub = Opc == sub;
496 return ((int)isSub << 8) | Offset;
497 }
498 inline unsigned char getAM5FP16Offset(unsigned AM5Opc) {
499 return AM5Opc & 0xFF;
500 }
501 inline AddrOpc getAM5FP16Op(unsigned AM5Opc) {
502 return ((AM5Opc >> 8) & 1) ? sub : add;
503 }
504
505 //===--------------------------------------------------------------------===//
506 // Addressing Mode #6
507 //===--------------------------------------------------------------------===//
508 //
509 // This is used for NEON load / store instructions.
510 //
511 // addrmode6 := reg with optional alignment
512 //
513 // This is stored in two operands [regaddr, align]. The first is the
514 // address register. The second operand is the value of the alignment
515 // specifier in bytes or zero if no explicit alignment.
516 // Valid alignments depend on the specific instruction.
517
518 //===--------------------------------------------------------------------===//
519 // NEON Modified Immediates
520 //===--------------------------------------------------------------------===//
521 //
522 // Several NEON instructions (e.g., VMOV) take a "modified immediate"
523 // vector operand, where a small immediate encoded in the instruction
524 // specifies a full NEON vector value. These modified immediates are
525 // represented here as encoded integers. The low 8 bits hold the immediate
526 // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
527 // the "Cmode" field of the instruction. The interfaces below treat the
528 // Op and Cmode values as a single 5-bit value.
529
530 inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
531 return (OpCmode << 8) | Val;
532 }
533 inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
534 return (ModImm >> 8) & 0x1f;
535 }
536 inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; }
537
538 /// decodeNEONModImm - Decode a NEON modified immediate value into the
539 /// element value and the element size in bits. (If the element size is
540 /// smaller than the vector, it is splatted into all the elements.)
541 inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
542 unsigned OpCmode = getNEONModImmOpCmode(ModImm);
543 unsigned Imm8 = getNEONModImmVal(ModImm);
544 uint64_t Val = 0;
545
546 if (OpCmode == 0xe) {
547 // 8-bit vector elements
548 Val = Imm8;
549 EltBits = 8;
550 } else if ((OpCmode & 0xc) == 0x8) {
551 // 16-bit vector elements
552 unsigned ByteNum = (OpCmode & 0x6) >> 1;
553 Val = Imm8 << (8 * ByteNum);
554 EltBits = 16;
555 } else if ((OpCmode & 0x8) == 0) {
556 // 32-bit vector elements, zero with one byte set
557 unsigned ByteNum = (OpCmode & 0x6) >> 1;
558 Val = Imm8 << (8 * ByteNum);
559 EltBits = 32;
560 } else if ((OpCmode & 0xe) == 0xc) {
561 // 32-bit vector elements, one byte with low bits set
562 unsigned ByteNum = 1 + (OpCmode & 0x1);
563 Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
564 EltBits = 32;
565 } else if (OpCmode == 0x1e) {
566 // 64-bit vector elements
567 for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
568 if ((ModImm >> ByteNum) & 1)
569 Val |= (uint64_t)0xff << (8 * ByteNum);
570 }
571 EltBits = 64;
572 } else {
573 llvm_unreachable("Unsupported NEON immediate")::llvm::llvm_unreachable_internal("Unsupported NEON immediate"
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 573)
;
574 }
575 return Val;
576 }
577
578 // Generic validation for single-byte immediate (0X00, 00X0, etc).
579 inline bool isNEONBytesplat(unsigned Value, unsigned Size) {
580 assert(Size >= 1 && Size <= 4 && "Invalid size")(static_cast <bool> (Size >= 1 && Size <=
4 && "Invalid size") ? void (0) : __assert_fail ("Size >= 1 && Size <= 4 && \"Invalid size\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 580, __extension__ __PRETTY_FUNCTION__))
;
581 unsigned count = 0;
582 for (unsigned i = 0; i < Size; ++i) {
583 if (Value & 0xff) count++;
584 Value >>= 8;
585 }
586 return count == 1;
587 }
588
589 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
590 inline bool isNEONi16splat(unsigned Value) {
591 if (Value > 0xffff)
592 return false;
593 // i16 value with set bits only in one byte X0 or 0X.
594 return Value == 0 || isNEONBytesplat(Value, 2);
595 }
596
597 // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR
598 inline unsigned encodeNEONi16splat(unsigned Value) {
599 assert(isNEONi16splat(Value) && "Invalid NEON splat value")(static_cast <bool> (isNEONi16splat(Value) && "Invalid NEON splat value"
) ? void (0) : __assert_fail ("isNEONi16splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 599, __extension__ __PRETTY_FUNCTION__))
;
600 if (Value >= 0x100)
601 Value = (Value >> 8) | 0xa00;
602 else
603 Value |= 0x800;
604 return Value;
605 }
606
607 /// Checks if Value is a correct immediate for instructions like VBIC/VORR.
608 inline bool isNEONi32splat(unsigned Value) {
609 // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
610 return Value == 0 || isNEONBytesplat(Value, 4);
611 }
612
613 /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR.
614 inline unsigned encodeNEONi32splat(unsigned Value) {
615 assert(isNEONi32splat(Value) && "Invalid NEON splat value")(static_cast <bool> (isNEONi32splat(Value) && "Invalid NEON splat value"
) ? void (0) : __assert_fail ("isNEONi32splat(Value) && \"Invalid NEON splat value\""
, "/build/llvm-toolchain-snapshot-6.0~svn318882/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h"
, 615, __extension__ __PRETTY_FUNCTION__))
;
616 if (Value >= 0x100 && Value <= 0xff00)
617 Value = (Value >> 8) | 0x200;
618 else if (Value > 0xffff && Value <= 0xff0000)
619 Value = (Value >> 16) | 0x400;
620 else if (Value > 0xffffff)
621 Value = (Value >> 24) | 0x600;
622 return Value;
623 }
624
625 //===--------------------------------------------------------------------===//
626 // Floating-point Immediates
627 //
628 inline float getFPImmFloat(unsigned Imm) {
629 // We expect an 8-bit binary encoding of a floating-point number here.
630 union {
631 uint32_t I;
632 float F;
633 } FPUnion;
634
635 uint8_t Sign = (Imm >> 7) & 0x1;
636 uint8_t Exp = (Imm >> 4) & 0x7;
637 uint8_t Mantissa = Imm & 0xf;
638
639 // 8-bit FP iEEEE Float Encoding
640 // abcd efgh aBbbbbbc defgh000 00000000 00000000
641 //
642 // where B = NOT(b);
643
644 FPUnion.I = 0;
645 FPUnion.I |= Sign << 31;
646 FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30;
647 FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25;
648 FPUnion.I |= (Exp & 0x3) << 23;
649 FPUnion.I |= Mantissa << 19;
650 return FPUnion.F;
651 }
652
653 /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
654 /// floating-point value. If the value cannot be represented as an 8-bit
655 /// floating-point value, then return -1.
656 inline int getFP16Imm(const APInt &Imm) {
657 uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
658 int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
659 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
660
661 // We can handle 4 bits of mantissa.
662 // mantissa = (16+UInt(e:f:g:h))/16.
663 if (Mantissa & 0x3f)
664 return -1;
665 Mantissa >>= 6;
666
667 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
668 if (Exp < -3 || Exp > 4)
669 return -1;
670 Exp = ((Exp+3) & 0x7) ^ 4;
671
672 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
673 }
674
675 inline int getFP16Imm(const APFloat &FPImm) {
676 return getFP16Imm(FPImm.bitcastToAPInt());
677 }
678
679 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
680 /// floating-point value. If the value cannot be represented as an 8-bit
681 /// floating-point value, then return -1.
682 inline int getFP32Imm(const APInt &Imm) {
683 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
684 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
685 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
686
687 // We can handle 4 bits of mantissa.
688 // mantissa = (16+UInt(e:f:g:h))/16.
689 if (Mantissa & 0x7ffff)
690 return -1;
691 Mantissa >>= 19;
692 if ((Mantissa & 0xf) != Mantissa)
693 return -1;
694
695 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
696 if (Exp < -3 || Exp > 4)
697 return -1;
698 Exp = ((Exp+3) & 0x7) ^ 4;
699
700 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
701 }
702
703 inline int getFP32Imm(const APFloat &FPImm) {
704 return getFP32Imm(FPImm.bitcastToAPInt());
705 }
706
707 /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit
708 /// floating-point value. If the value cannot be represented as an 8-bit
709 /// floating-point value, then return -1.
710 inline int getFP64Imm(const APInt &Imm) {
711 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
712 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
713 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL;
714
715 // We can handle 4 bits of mantissa.
716 // mantissa = (16+UInt(e:f:g:h))/16.
717 if (Mantissa & 0xffffffffffffULL)
718 return -1;
719 Mantissa >>= 48;
720 if ((Mantissa & 0xf) != Mantissa)
721 return -1;
722
723 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
724 if (Exp < -3 || Exp > 4)
725 return -1;
726 Exp = ((Exp+3) & 0x7) ^ 4;
727
728 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
729 }
730
731 inline int getFP64Imm(const APFloat &FPImm) {
732 return getFP64Imm(FPImm.bitcastToAPInt());
733 }
734
735} // end namespace ARM_AM
736} // end namespace llvm
737
738#endif
739