Bug Summary

File:build/source/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
Warning:line 227, column 55
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SIModeRegister.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AMDGPU -I /build/source/llvm/lib/Target/AMDGPU -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Target/AMDGPU/SIModeRegister.cpp

/build/source/llvm/lib/Target/AMDGPU/SIModeRegister.cpp

1//===-- SIModeRegister.cpp - Mode Register --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass inserts changes to the Mode register settings as required.
10/// Note that currently it only deals with the Double Precision Floating Point
11/// rounding mode setting, but is intended to be generic enough to be easily
12/// expanded.
13///
14//===----------------------------------------------------------------------===//
15//
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/MachineFunctionPass.h"
21#include <queue>
22
23#define DEBUG_TYPE"si-mode-register" "si-mode-register"
24
25STATISTIC(NumSetregInserted, "Number of setreg of mode register inserted.")static llvm::Statistic NumSetregInserted = {"si-mode-register"
, "NumSetregInserted", "Number of setreg of mode register inserted."
}
;
26
27using namespace llvm;
28
29struct Status {
30 // Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
31 // known value
32 unsigned Mask = 0;
33 unsigned Mode = 0;
34
35 Status() = default;
36
37 Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
7
The value of 'NewMask' is assigned to 'Delta.Mask', which participates in a condition later
38 Mode &= Mask;
39 };
40
41 // merge two status values such that only values that don't conflict are
42 // preserved
43 Status merge(const Status &S) const {
44 return Status((Mask | S.Mask), ((Mode & ~S.Mask) | (S.Mode & S.Mask)));
45 }
46
47 // merge an unknown value by using the unknown value's mask to remove bits
48 // from the result
49 Status mergeUnknown(unsigned newMask) {
50 return Status(Mask & ~newMask, Mode & ~newMask);
51 }
52
53 // intersect two Status values to produce a mode and mask that is a subset
54 // of both values
55 Status intersect(const Status &S) const {
56 unsigned NewMask = (Mask & S.Mask) & (Mode ^ ~S.Mode);
57 unsigned NewMode = (Mode & NewMask);
58 return Status(NewMask, NewMode);
59 }
60
61 // produce the delta required to change the Mode to the required Mode
62 Status delta(const Status &S) const {
63 return Status((S.Mask & (Mode ^ S.Mode)) | (~Mask & S.Mask), S.Mode);
6
Calling constructor for 'Status'
8
Returning from constructor for 'Status'
64 }
65
66 bool operator==(const Status &S) const {
67 return (Mask == S.Mask) && (Mode == S.Mode);
68 }
69
70 bool operator!=(const Status &S) const { return !(*this == S); }
71
72 bool isCompatible(Status &S) {
73 return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode);
74 }
75
76 bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); }
77};
78
79class BlockData {
80public:
81 // The Status that represents the mode register settings required by the
82 // FirstInsertionPoint (if any) in this block. Calculated in Phase 1.
83 Status Require;
84
85 // The Status that represents the net changes to the Mode register made by
86 // this block, Calculated in Phase 1.
87 Status Change;
88
89 // The Status that represents the mode register settings on exit from this
90 // block. Calculated in Phase 2.
91 Status Exit;
92
93 // The Status that represents the intersection of exit Mode register settings
94 // from all predecessor blocks. Calculated in Phase 2, and used by Phase 3.
95 Status Pred;
96
97 // In Phase 1 we record the first instruction that has a mode requirement,
98 // which is used in Phase 3 if we need to insert a mode change.
99 MachineInstr *FirstInsertionPoint = nullptr;
100
101 // A flag to indicate whether an Exit value has been set (we can't tell by
102 // examining the Exit value itself as all values may be valid results).
103 bool ExitSet = false;
104
105 BlockData() = default;
106};
107
108namespace {
109
110class SIModeRegister : public MachineFunctionPass {
111public:
112 static char ID;
113
114 std::vector<std::unique_ptr<BlockData>> BlockInfo;
115 std::queue<MachineBasicBlock *> Phase2List;
116
117 // The default mode register setting currently only caters for the floating
118 // point double precision rounding mode.
119 // We currently assume the default rounding mode is Round to Nearest
120 // NOTE: this should come from a per function rounding mode setting once such
121 // a setting exists.
122 unsigned DefaultMode = FP_ROUND_ROUND_TO_NEAREST0;
123 Status DefaultStatus =
124 Status(FP_ROUND_MODE_DP(0x3)(((0x3) & 0x3) << 2), FP_ROUND_MODE_DP(DefaultMode)(((DefaultMode) & 0x3) << 2));
125
126 bool Changed = false;
127
128public:
129 SIModeRegister() : MachineFunctionPass(ID) {}
130
131 bool runOnMachineFunction(MachineFunction &MF) override;
132
133 void getAnalysisUsage(AnalysisUsage &AU) const override {
134 AU.setPreservesCFG();
135 MachineFunctionPass::getAnalysisUsage(AU);
136 }
137
138 void processBlockPhase1(MachineBasicBlock &MBB, const SIInstrInfo *TII);
139
140 void processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII);
141
142 void processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII);
143
144 Status getInstructionMode(MachineInstr &MI, const SIInstrInfo *TII);
145
146 void insertSetreg(MachineBasicBlock &MBB, MachineInstr *I,
147 const SIInstrInfo *TII, Status InstrMode);
148};
149} // End anonymous namespace.
150
151INITIALIZE_PASS(SIModeRegister, DEBUG_TYPE,static void *initializeSIModeRegisterPassOnce(PassRegistry &
Registry) { PassInfo *PI = new PassInfo( "Insert required mode register values"
, "si-mode-register", &SIModeRegister::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SIModeRegister>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeSIModeRegisterPassFlag; void llvm::initializeSIModeRegisterPass
(PassRegistry &Registry) { llvm::call_once(InitializeSIModeRegisterPassFlag
, initializeSIModeRegisterPassOnce, std::ref(Registry)); }
152 "Insert required mode register values", false, false)static void *initializeSIModeRegisterPassOnce(PassRegistry &
Registry) { PassInfo *PI = new PassInfo( "Insert required mode register values"
, "si-mode-register", &SIModeRegister::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SIModeRegister>), false, false); Registry
.registerPass(*PI, true); return PI; } static llvm::once_flag
InitializeSIModeRegisterPassFlag; void llvm::initializeSIModeRegisterPass
(PassRegistry &Registry) { llvm::call_once(InitializeSIModeRegisterPassFlag
, initializeSIModeRegisterPassOnce, std::ref(Registry)); }
153
154char SIModeRegister::ID = 0;
155
156char &llvm::SIModeRegisterID = SIModeRegister::ID;
157
158FunctionPass *llvm::createSIModeRegisterPass() { return new SIModeRegister(); }
159
160// Determine the Mode register setting required for this instruction.
161// Instructions which don't use the Mode register return a null Status.
162// Note this currently only deals with instructions that use the floating point
163// double precision setting.
164Status SIModeRegister::getInstructionMode(MachineInstr &MI,
165 const SIInstrInfo *TII) {
166 if (TII->usesFPDPRounding(MI) ||
167 MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
168 MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
169 switch (MI.getOpcode()) {
170 case AMDGPU::V_INTERP_P1LL_F16:
171 case AMDGPU::V_INTERP_P1LV_F16:
172 case AMDGPU::V_INTERP_P2_F16:
173 // f16 interpolation instructions need double precision round to zero
174 return Status(FP_ROUND_MODE_DP(3)(((3) & 0x3) << 2),
175 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_ZERO)(((3) & 0x3) << 2));
176 case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
177 // Replacing the pseudo by a real instruction in place
178 if (TII->getSubtarget().hasTrue16BitInsts()) {
179 MachineBasicBlock &MBB = *MI.getParent();
180 MachineInstrBuilder B(*MBB.getParent(), MI);
181 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
182 MachineOperand Src0 = MI.getOperand(1);
183 MI.removeOperand(1);
184 B.addImm(0); // src0_modifiers
185 B.add(Src0); // re-add src0 operand
186 B.addImm(0); // clamp
187 B.addImm(0); // omod
188 } else
189 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
190 return Status(FP_ROUND_MODE_DP(3)(((3) & 0x3) << 2),
191 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_INF)(((1) & 0x3) << 2));
192 }
193 case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
194 // Replacing the pseudo by a real instruction in place
195 if (TII->getSubtarget().hasTrue16BitInsts()) {
196 MachineBasicBlock &MBB = *MI.getParent();
197 MachineInstrBuilder B(*MBB.getParent(), MI);
198 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
199 MachineOperand Src0 = MI.getOperand(1);
200 MI.removeOperand(1);
201 B.addImm(0); // src0_modifiers
202 B.add(Src0); // re-add src0 operand
203 B.addImm(0); // clamp
204 B.addImm(0); // omod
205 } else
206 MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
207 return Status(FP_ROUND_MODE_DP(3)(((3) & 0x3) << 2),
208 FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEGINF)(((2) & 0x3) << 2));
209 }
210 default:
211 return DefaultStatus;
212 }
213 }
214 return Status();
215}
216
217// Insert a setreg instruction to update the Mode register.
218// It is possible (though unlikely) for an instruction to require a change to
219// the value of disjoint parts of the Mode register when we don't know the
220// value of the intervening bits. In that case we need to use more than one
221// setreg instruction.
222void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
223 const SIInstrInfo *TII, Status InstrMode) {
224 while (InstrMode.Mask) {
13
Loop condition is true. Entering loop body
225 unsigned Offset = llvm::countr_zero<unsigned>(InstrMode.Mask);
226 unsigned Width = llvm::countr_one<unsigned>(InstrMode.Mask >> Offset);
14
Calling 'countr_one<unsigned int>'
24
Returning from 'countr_one<unsigned int>'
25
'Width' initialized to 32
227 unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
26
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
228 BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
229 .addImm(Value)
230 .addImm(((Width - 1) << AMDGPU::Hwreg::WIDTH_M1_SHIFT_) |
231 (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) |
232 (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_));
233 ++NumSetregInserted;
234 Changed = true;
235 InstrMode.Mask &= ~(((1 << Width) - 1) << Offset);
236 }
237}
238
239// In Phase 1 we iterate through the instructions of the block and for each
240// instruction we get its mode usage. If the instruction uses the Mode register
241// we:
242// - update the Change status, which tracks the changes to the Mode register
243// made by this block
244// - if this instruction's requirements are compatible with the current setting
245// of the Mode register we merge the modes
246// - if it isn't compatible and an InsertionPoint isn't set, then we set the
247// InsertionPoint to the current instruction, and we remember the current
248// mode
249// - if it isn't compatible and InsertionPoint is set we insert a seteg before
250// that instruction (unless this instruction forms part of the block's
251// entry requirements in which case the insertion is deferred until Phase 3
252// when predecessor exit values are known), and move the insertion point to
253// this instruction
254// - if this is a setreg instruction we treat it as an incompatible instruction.
255// This is sub-optimal but avoids some nasty corner cases, and is expected to
256// occur very rarely.
257// - on exit we have set the Require, Change, and initial Exit modes.
258void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
259 const SIInstrInfo *TII) {
260 auto NewInfo = std::make_unique<BlockData>();
261 MachineInstr *InsertionPoint = nullptr;
262 // RequirePending is used to indicate whether we are collecting the initial
263 // requirements for the block, and need to defer the first InsertionPoint to
264 // Phase 3. It is set to false once we have set FirstInsertionPoint, or when
265 // we discover an explicit setreg that means this block doesn't have any
266 // initial requirements.
267 bool RequirePending = true;
268 Status IPChange;
269 for (MachineInstr &MI : MBB) {
270 Status InstrMode = getInstructionMode(MI, TII);
271 if (MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
272 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
273 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
274 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
275 // We preserve any explicit mode register setreg instruction we encounter,
276 // as we assume it has been inserted by a higher authority (this is
277 // likely to be a very rare occurrence).
278 unsigned Dst = TII->getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
279 if (((Dst & AMDGPU::Hwreg::ID_MASK_) >> AMDGPU::Hwreg::ID_SHIFT_) !=
280 AMDGPU::Hwreg::ID_MODE)
281 continue;
282
283 unsigned Width = ((Dst & AMDGPU::Hwreg::WIDTH_M1_MASK_) >>
284 AMDGPU::Hwreg::WIDTH_M1_SHIFT_) +
285 1;
286 unsigned Offset =
287 (Dst & AMDGPU::Hwreg::OFFSET_MASK_) >> AMDGPU::Hwreg::OFFSET_SHIFT_;
288 unsigned Mask = ((1 << Width) - 1) << Offset;
289
290 // If an InsertionPoint is set we will insert a setreg there.
291 if (InsertionPoint) {
292 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
293 InsertionPoint = nullptr;
294 }
295 // If this is an immediate then we know the value being set, but if it is
296 // not an immediate then we treat the modified bits of the mode register
297 // as unknown.
298 if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
299 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
300 unsigned Val = TII->getNamedOperand(MI, AMDGPU::OpName::imm)->getImm();
301 unsigned Mode = (Val << Offset) & Mask;
302 Status Setreg = Status(Mask, Mode);
303 // If we haven't already set the initial requirements for the block we
304 // don't need to as the requirements start from this explicit setreg.
305 RequirePending = false;
306 NewInfo->Change = NewInfo->Change.merge(Setreg);
307 } else {
308 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
309 }
310 } else if (!NewInfo->Change.isCompatible(InstrMode)) {
311 // This instruction uses the Mode register and its requirements aren't
312 // compatible with the current mode.
313 if (InsertionPoint) {
314 // If the required mode change cannot be included in the current
315 // InsertionPoint changes, we need a setreg and start a new
316 // InsertionPoint.
317 if (!IPChange.delta(NewInfo->Change).isCombinable(InstrMode)) {
318 if (RequirePending) {
319 // This is the first insertionPoint in the block so we will defer
320 // the insertion of the setreg to Phase 3 where we know whether or
321 // not it is actually needed.
322 NewInfo->FirstInsertionPoint = InsertionPoint;
323 NewInfo->Require = NewInfo->Change;
324 RequirePending = false;
325 } else {
326 insertSetreg(MBB, InsertionPoint, TII,
327 IPChange.delta(NewInfo->Change));
328 IPChange = NewInfo->Change;
329 }
330 // Set the new InsertionPoint
331 InsertionPoint = &MI;
332 }
333 NewInfo->Change = NewInfo->Change.merge(InstrMode);
334 } else {
335 // No InsertionPoint is currently set - this is either the first in
336 // the block or we have previously seen an explicit setreg.
337 InsertionPoint = &MI;
338 IPChange = NewInfo->Change;
339 NewInfo->Change = NewInfo->Change.merge(InstrMode);
340 }
341 }
342 }
343 if (RequirePending) {
344 // If we haven't yet set the initial requirements for the block we set them
345 // now.
346 NewInfo->FirstInsertionPoint = InsertionPoint;
347 NewInfo->Require = NewInfo->Change;
348 } else if (InsertionPoint) {
349 // We need to insert a setreg at the InsertionPoint
350 insertSetreg(MBB, InsertionPoint, TII, IPChange.delta(NewInfo->Change));
351 }
352 NewInfo->Exit = NewInfo->Change;
353 BlockInfo[MBB.getNumber()] = std::move(NewInfo);
354}
355
356// In Phase 2 we revisit each block and calculate the common Mode register
357// value provided by all predecessor blocks. If the Exit value for the block
358// is changed, then we add the successor blocks to the worklist so that the
359// exit value is propagated.
360void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
361 const SIInstrInfo *TII) {
362 bool RevisitRequired = false;
363 bool ExitSet = false;
364 unsigned ThisBlock = MBB.getNumber();
365 if (MBB.pred_empty()) {
366 // There are no predecessors, so use the default starting status.
367 BlockInfo[ThisBlock]->Pred = DefaultStatus;
368 ExitSet = true;
369 } else {
370 // Build a status that is common to all the predecessors by intersecting
371 // all the predecessor exit status values.
372 // Mask bits (which represent the Mode bits with a known value) can only be
373 // added by explicit SETREG instructions or the initial default value -
374 // the intersection process may remove Mask bits.
375 // If we find a predecessor that has not yet had an exit value determined
376 // (this can happen for example if a block is its own predecessor) we defer
377 // use of that value as the Mask will be all zero, and we will revisit this
378 // block again later (unless the only predecessor without an exit value is
379 // this block).
380 MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end();
381 MachineBasicBlock &PB = *(*P);
382 unsigned PredBlock = PB.getNumber();
383 if ((ThisBlock == PredBlock) && (std::next(P) == E)) {
384 BlockInfo[ThisBlock]->Pred = DefaultStatus;
385 ExitSet = true;
386 } else if (BlockInfo[PredBlock]->ExitSet) {
387 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
388 ExitSet = true;
389 } else if (PredBlock != ThisBlock)
390 RevisitRequired = true;
391
392 for (P = std::next(P); P != E; P = std::next(P)) {
393 MachineBasicBlock *Pred = *P;
394 unsigned PredBlock = Pred->getNumber();
395 if (BlockInfo[PredBlock]->ExitSet) {
396 if (BlockInfo[ThisBlock]->ExitSet) {
397 BlockInfo[ThisBlock]->Pred =
398 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
399 } else {
400 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
401 }
402 ExitSet = true;
403 } else if (PredBlock != ThisBlock)
404 RevisitRequired = true;
405 }
406 }
407 Status TmpStatus =
408 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
409 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
410 BlockInfo[ThisBlock]->Exit = TmpStatus;
411 // Add the successors to the work list so we can propagate the changed exit
412 // status.
413 for (MachineBasicBlock *Succ : MBB.successors())
414 Phase2List.push(Succ);
415 }
416 BlockInfo[ThisBlock]->ExitSet = ExitSet;
417 if (RevisitRequired)
418 Phase2List.push(&MBB);
419}
420
421// In Phase 3 we revisit each block and if it has an insertion point defined we
422// check whether the predecessor mode meets the block's entry requirements. If
423// not we insert an appropriate setreg instruction to modify the Mode register.
424void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB,
425 const SIInstrInfo *TII) {
426 unsigned ThisBlock = MBB.getNumber();
427 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
4
Taking true branch
428 Status Delta =
429 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
5
Calling 'Status::delta'
9
Returning from 'Status::delta'
430 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
10
Assuming field 'FirstInsertionPoint' is null
11
Taking false branch
431 insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta);
432 else
433 insertSetreg(MBB, &MBB.instr_front(), TII, Delta);
12
Calling 'SIModeRegister::insertSetreg'
434 }
435}
436
437bool SIModeRegister::runOnMachineFunction(MachineFunction &MF) {
438 BlockInfo.resize(MF.getNumBlockIDs());
439 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
440 const SIInstrInfo *TII = ST.getInstrInfo();
441
442 // Processing is performed in a number of phases
443
444 // Phase 1 - determine the initial mode required by each block, and add setreg
445 // instructions for intra block requirements.
446 for (MachineBasicBlock &BB : MF)
447 processBlockPhase1(BB, TII);
448
449 // Phase 2 - determine the exit mode from each block. We add all blocks to the
450 // list here, but will also add any that need to be revisited during Phase 2
451 // processing.
452 for (MachineBasicBlock &BB : MF)
453 Phase2List.push(&BB);
454 while (!Phase2List.empty()) {
1
Assuming the condition is false
2
Loop condition is false. Execution continues on line 461
455 processBlockPhase2(*Phase2List.front(), TII);
456 Phase2List.pop();
457 }
458
459 // Phase 3 - add an initial setreg to each block where the required entry mode
460 // is not satisfied by the exit mode of all its predecessors.
461 for (MachineBasicBlock &BB : MF)
462 processBlockPhase3(BB, TII);
3
Calling 'SIModeRegister::processBlockPhase3'
463
464 BlockInfo.clear();
465
466 return Changed;
467}

/build/source/llvm/include/llvm/ADT/bit.h

1//===-- llvm/ADT/bit.h - C++20 <bit> ----------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the C++20 <bit> header.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ADT_BIT_H
15#define LLVM_ADT_BIT_H
16
17#include "llvm/Support/Compiler.h"
18#include <cstdint>
19#include <limits>
20#include <type_traits>
21
22#if !__has_builtin(__builtin_bit_cast)1
23#include <cstring>
24#endif
25
26#if defined(_MSC_VER) && !defined(_DEBUG1)
27#include <cstdlib> // for _byteswap_{ushort,ulong,uint64}
28#endif
29
30#ifdef _MSC_VER
31// Declare these intrinsics manually rather including intrin.h. It's very
32// expensive, and bit.h is popular via MathExtras.h.
33// #include <intrin.h>
34extern "C" {
35unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
36unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
37unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
38unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
39}
40#endif
41
42namespace llvm {
43
44// This implementation of bit_cast is different from the C++20 one in two ways:
45// - It isn't constexpr because that requires compiler support.
46// - It requires trivially-constructible To, to avoid UB in the implementation.
47template <
48 typename To, typename From,
49 typename = std::enable_if_t<sizeof(To) == sizeof(From)>,
50 typename = std::enable_if_t<std::is_trivially_constructible<To>::value>,
51 typename = std::enable_if_t<std::is_trivially_copyable<To>::value>,
52 typename = std::enable_if_t<std::is_trivially_copyable<From>::value>>
53[[nodiscard]] inline To bit_cast(const From &from) noexcept {
54#if __has_builtin(__builtin_bit_cast)1
55 return __builtin_bit_cast(To, from);
56#else
57 To to;
58 std::memcpy(&to, &from, sizeof(To));
59 return to;
60#endif
61}
62
63/// Reverses the bytes in the given integer value V.
64template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
65[[nodiscard]] constexpr T byteswap(T V) noexcept {
66 if constexpr (sizeof(T) == 1) {
67 return V;
68 } else if constexpr (sizeof(T) == 2) {
69 uint16_t UV = V;
70#if defined(_MSC_VER) && !defined(_DEBUG1)
71 // The DLL version of the runtime lacks these functions (bug!?), but in a
72 // release build they're replaced with BSWAP instructions anyway.
73 return _byteswap_ushort(UV);
74#else
75 uint16_t Hi = UV << 8;
76 uint16_t Lo = UV >> 8;
77 return Hi | Lo;
78#endif
79 } else if constexpr (sizeof(T) == 4) {
80 uint32_t UV = V;
81#if __has_builtin(__builtin_bswap32)1
82 return __builtin_bswap32(UV);
83#elif defined(_MSC_VER) && !defined(_DEBUG1)
84 return _byteswap_ulong(UV);
85#else
86 uint32_t Byte0 = UV & 0x000000FF;
87 uint32_t Byte1 = UV & 0x0000FF00;
88 uint32_t Byte2 = UV & 0x00FF0000;
89 uint32_t Byte3 = UV & 0xFF000000;
90 return (Byte0 << 24) | (Byte1 << 8) | (Byte2 >> 8) | (Byte3 >> 24);
91#endif
92 } else if constexpr (sizeof(T) == 8) {
93 uint64_t UV = V;
94#if __has_builtin(__builtin_bswap64)1
95 return __builtin_bswap64(UV);
96#elif defined(_MSC_VER) && !defined(_DEBUG1)
97 return _byteswap_uint64(UV);
98#else
99 uint64_t Hi = llvm::byteswap<uint32_t>(UV);
100 uint32_t Lo = llvm::byteswap<uint32_t>(UV >> 32);
101 return (Hi << 32) | Lo;
102#endif
103 } else {
104 static_assert(!sizeof(T *), "Don't know how to handle the given type.");
105 return 0;
106 }
107}
108
109template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
110[[nodiscard]] constexpr inline bool has_single_bit(T Value) noexcept {
111 return (Value != 0) && ((Value & (Value - 1)) == 0);
112}
113
114namespace detail {
115template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
116 static unsigned count(T Val) {
117 if (!Val)
118 return std::numeric_limits<T>::digits;
119 if (Val & 0x1)
120 return 0;
121
122 // Bisection method.
123 unsigned ZeroBits = 0;
124 T Shift = std::numeric_limits<T>::digits >> 1;
125 T Mask = std::numeric_limits<T>::max() >> Shift;
126 while (Shift) {
127 if ((Val & Mask) == 0) {
128 Val >>= Shift;
129 ZeroBits |= Shift;
130 }
131 Shift >>= 1;
132 Mask >>= Shift;
133 }
134 return ZeroBits;
135 }
136};
137
138#if defined(__GNUC__4) || defined(_MSC_VER)
139template <typename T> struct TrailingZerosCounter<T, 4> {
140 static unsigned count(T Val) {
141 if (Val == 0)
17
Assuming 'Val' is equal to 0
18
Taking true branch
142 return 32;
19
Returning the value 32
143
144#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
145 return __builtin_ctz(Val);
146#elif defined(_MSC_VER)
147 unsigned long Index;
148 _BitScanForward(&Index, Val);
149 return Index;
150#endif
151 }
152};
153
154#if !defined(_MSC_VER) || defined(_M_X64)
155template <typename T> struct TrailingZerosCounter<T, 8> {
156 static unsigned count(T Val) {
157 if (Val == 0)
158 return 64;
159
160#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
161 return __builtin_ctzll(Val);
162#elif defined(_MSC_VER)
163 unsigned long Index;
164 _BitScanForward64(&Index, Val);
165 return Index;
166#endif
167 }
168};
169#endif
170#endif
171} // namespace detail
172
173/// Count number of 0's from the least significant bit to the most
174/// stopping at the first 1.
175///
176/// Only unsigned integral types are allowed.
177///
178/// Returns std::numeric_limits<T>::digits on an input of 0.
179template <typename T> [[nodiscard]] int countr_zero(T Val) {
180 static_assert(std::is_unsigned_v<T>,
181 "Only unsigned integral types are allowed.");
182 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val);
16
Calling 'TrailingZerosCounter::count'
20
Returning from 'TrailingZerosCounter::count'
21
Returning the value 32
183}
184
185namespace detail {
186template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
187 static unsigned count(T Val) {
188 if (!Val)
189 return std::numeric_limits<T>::digits;
190
191 // Bisection method.
192 unsigned ZeroBits = 0;
193 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
194 T Tmp = Val >> Shift;
195 if (Tmp)
196 Val = Tmp;
197 else
198 ZeroBits |= Shift;
199 }
200 return ZeroBits;
201 }
202};
203
204#if defined(__GNUC__4) || defined(_MSC_VER)
205template <typename T> struct LeadingZerosCounter<T, 4> {
206 static unsigned count(T Val) {
207 if (Val == 0)
208 return 32;
209
210#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
211 return __builtin_clz(Val);
212#elif defined(_MSC_VER)
213 unsigned long Index;
214 _BitScanReverse(&Index, Val);
215 return Index ^ 31;
216#endif
217 }
218};
219
220#if !defined(_MSC_VER) || defined(_M_X64)
221template <typename T> struct LeadingZerosCounter<T, 8> {
222 static unsigned count(T Val) {
223 if (Val == 0)
224 return 64;
225
226#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
227 return __builtin_clzll(Val);
228#elif defined(_MSC_VER)
229 unsigned long Index;
230 _BitScanReverse64(&Index, Val);
231 return Index ^ 63;
232#endif
233 }
234};
235#endif
236#endif
237} // namespace detail
238
239/// Count number of 0's from the most significant bit to the least
240/// stopping at the first 1.
241///
242/// Only unsigned integral types are allowed.
243///
244/// Returns std::numeric_limits<T>::digits on an input of 0.
245template <typename T> [[nodiscard]] int countl_zero(T Val) {
246 static_assert(std::is_unsigned_v<T>,
247 "Only unsigned integral types are allowed.");
248 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val);
249}
250
251/// Count the number of ones from the most significant bit to the first
252/// zero bit.
253///
254/// Ex. countl_one(0xFF0FFF00) == 8.
255/// Only unsigned integral types are allowed.
256///
257/// Returns std::numeric_limits<T>::digits on an input of all ones.
258template <typename T> [[nodiscard]] int countl_one(T Value) {
259 static_assert(std::is_unsigned_v<T>,
260 "Only unsigned integral types are allowed.");
261 return llvm::countl_zero<T>(~Value);
262}
263
264/// Count the number of ones from the least significant bit to the first
265/// zero bit.
266///
267/// Ex. countr_one(0x00FF00FF) == 8.
268/// Only unsigned integral types are allowed.
269///
270/// Returns std::numeric_limits<T>::digits on an input of all ones.
271template <typename T> [[nodiscard]] int countr_one(T Value) {
272 static_assert(std::is_unsigned_v<T>,
273 "Only unsigned integral types are allowed.");
274 return llvm::countr_zero<T>(~Value);
15
Calling 'countr_zero<unsigned int>'
22
Returning from 'countr_zero<unsigned int>'
23
Returning the value 32
275}
276
277/// Returns the number of bits needed to represent Value if Value is nonzero.
278/// Returns 0 otherwise.
279///
280/// Ex. bit_width(5) == 3.
281template <typename T> [[nodiscard]] int bit_width(T Value) {
282 static_assert(std::is_unsigned_v<T>,
283 "Only unsigned integral types are allowed.");
284 return std::numeric_limits<T>::digits - llvm::countl_zero(Value);
285}
286
287/// Returns the largest integral power of two no greater than Value if Value is
288/// nonzero. Returns 0 otherwise.
289///
290/// Ex. bit_floor(5) == 4.
291template <typename T> [[nodiscard]] T bit_floor(T Value) {
292 static_assert(std::is_unsigned_v<T>,
293 "Only unsigned integral types are allowed.");
294 if (!Value)
295 return 0;
296 return T(1) << (llvm::bit_width(Value) - 1);
297}
298
299/// Returns the smallest integral power of two no smaller than Value if Value is
300/// nonzero. Returns 1 otherwise.
301///
302/// Ex. bit_ceil(5) == 8.
303///
304/// The return value is undefined if the input is larger than the largest power
305/// of two representable in T.
306template <typename T> [[nodiscard]] T bit_ceil(T Value) {
307 static_assert(std::is_unsigned_v<T>,
308 "Only unsigned integral types are allowed.");
309 if (Value < 2)
310 return 1;
311 return T(1) << llvm::bit_width<T>(Value - 1u);
312}
313
314namespace detail {
315template <typename T, std::size_t SizeOfT> struct PopulationCounter {
316 static int count(T Value) {
317 // Generic version, forward to 32 bits.
318 static_assert(SizeOfT <= 4, "Not implemented!");
319#if defined(__GNUC__4)
320 return (int)__builtin_popcount(Value);
321#else
322 uint32_t v = Value;
323 v = v - ((v >> 1) & 0x55555555);
324 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
325 return int(((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24);
326#endif
327 }
328};
329
330template <typename T> struct PopulationCounter<T, 8> {
331 static int count(T Value) {
332#if defined(__GNUC__4)
333 return (int)__builtin_popcountll(Value);
334#else
335 uint64_t v = Value;
336 v = v - ((v >> 1) & 0x5555555555555555ULL);
337 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
338 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
339 return int((uint64_t)(v * 0x0101010101010101ULL) >> 56);
340#endif
341 }
342};
343} // namespace detail
344
345/// Count the number of set bits in a value.
346/// Ex. popcount(0xF000F000) = 8
347/// Returns 0 if the word is zero.
348template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
349[[nodiscard]] inline int popcount(T Value) noexcept {
350 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
351}
352
353// Forward-declare rotr so that rotl can use it.
354template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
355[[nodiscard]] constexpr T rotr(T V, int R);
356
357template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
358[[nodiscard]] constexpr T rotl(T V, int R) {
359 unsigned N = std::numeric_limits<T>::digits;
360
361 R = R % N;
362 if (!R)
363 return V;
364
365 if (R < 0)
366 return llvm::rotr(V, -R);
367
368 return (V << R) | (V >> (N - R));
369}
370
371template <typename T, typename> [[nodiscard]] constexpr T rotr(T V, int R) {
372 unsigned N = std::numeric_limits<T>::digits;
373
374 R = R % N;
375 if (!R)
376 return V;
377
378 if (R < 0)
379 return llvm::rotl(V, -R);
380
381 return (V >> R) | (V << (N - R));
382}
383
384} // namespace llvm
385
386#endif