Bug Summary

File:build/source/openmp/libomptarget/src/omptarget.cpp
Warning:line 883, column 9
Potential leak of memory pointed to by 'PostProcessingPtrs'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name omptarget.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D OMPT_SUPPORT=1 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I projects/openmp/libomptarget/src -I /build/source/openmp/libomptarget/src -I include -I /build/source/llvm/include -I projects/openmp/runtime/src -I /build/source/openmp/libomptarget/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/libomptarget/src/omptarget.cpp
1//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the interface to be used by Clang during the codegen of a
10// target region.
11//
12//===----------------------------------------------------------------------===//
13
14#include "omptarget.h"
15#include "device.h"
16#include "private.h"
17#include "rtl.h"
18
19#include "llvm/ADT/bit.h"
20
21#include <cassert>
22#include <cstdint>
23#include <vector>
24
25using llvm::SmallVector;
26
27int AsyncInfoTy::synchronize() {
28 int Result = OFFLOAD_SUCCESS(0);
29 if (!isQueueEmpty()) {
30 switch (SyncType) {
31 case SyncTy::BLOCKING:
32 // If we have a queue we need to synchronize it now.
33 Result = Device.synchronize(*this);
34 assert(AsyncInfo.Queue == nullptr &&(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 36, __extension__ __PRETTY_FUNCTION__
))
35 "The device plugin should have nulled the queue to indicate there "(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 36, __extension__ __PRETTY_FUNCTION__
))
36 "are no outstanding actions!")(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 36, __extension__ __PRETTY_FUNCTION__
))
;
37 break;
38 case SyncTy::NON_BLOCKING:
39 Result = Device.queryAsync(*this);
40 break;
41 }
42 }
43
44 // Run any pending post-processing function registered on this async object.
45 if (Result == OFFLOAD_SUCCESS(0) && isQueueEmpty())
46 Result = runPostProcessing();
47
48 return Result;
49}
50
51void *&AsyncInfoTy::getVoidPtrLocation() {
52 BufferLocations.push_back(nullptr);
53 return BufferLocations.back();
54}
55
56bool AsyncInfoTy::isDone() const { return isQueueEmpty(); }
57
58int32_t AsyncInfoTy::runPostProcessing() {
59 size_t Size = PostProcessingFunctions.size();
60 for (size_t I = 0; I < Size; ++I) {
61 const int Result = PostProcessingFunctions[I]();
62 if (Result != OFFLOAD_SUCCESS(0))
63 return Result;
64 }
65
66 // Clear the vector up until the last known function, since post-processing
67 // procedures might add new procedures themselves.
68 const auto PrevBegin = PostProcessingFunctions.begin();
69 PostProcessingFunctions.erase(PrevBegin, PrevBegin + Size);
70
71 return OFFLOAD_SUCCESS(0);
72}
73
74bool AsyncInfoTy::isQueueEmpty() const { return AsyncInfo.Queue == nullptr; }
75
76/* All begin addresses for partially mapped structs must be aligned, up to 16,
77 * in order to ensure proper alignment of members. E.g.
78 *
79 * struct S {
80 * int a; // 4-aligned
81 * int b; // 4-aligned
82 * int *p; // 8-aligned
83 * } s1;
84 * ...
85 * #pragma omp target map(tofrom: s1.b, s1.p[0:N])
86 * {
87 * s1.b = 5;
88 * for (int i...) s1.p[i] = ...;
89 * }
90 *
91 * Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
92 * BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
93 * then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
94 * requirements for its type. Now, when we allocate memory on the device, in
95 * CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
96 * This means that the chunk of the struct on the device will start at a
97 * 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
98 * address of p will be a misaligned 0x204 (on the host there was no need to add
99 * padding between b and p, so p comes exactly 4 bytes after b). If the device
100 * kernel tries to access s1.p, a misaligned address error occurs (as reported
101 * by the CUDA plugin). By padding the begin address down to a multiple of 8 and
102 * extending the size of the allocated chuck accordingly, the chuck on the
103 * device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
104 * &s1.p=0x208, as they should be to satisfy the alignment requirements.
105 */
106static const int64_t MaxAlignment = 16;
107
108/// Return the alignment requirement of partially mapped structs, see
109/// MaxAlignment above.
110static uint64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
111 int LowestOneBit = __builtin_ffsl(reinterpret_cast<uintptr_t>(HstPtrBase));
112 uint64_t BaseAlignment = 1 << (LowestOneBit - 1);
113 return MaxAlignment < BaseAlignment ? MaxAlignment : BaseAlignment;
114}
115
116/// Map global data and execute pending ctors
117static int initLibrary(DeviceTy &Device) {
118 /*
119 * Map global data
120 */
121 int32_t DeviceId = Device.DeviceID;
122 int Rc = OFFLOAD_SUCCESS(0);
123 bool SupportsEmptyImages = Device.RTL->supports_empty_images &&
12
Assuming field 'supports_empty_images' is null
124 Device.RTL->supports_empty_images() > 0;
125 {
126 std::lock_guard<decltype(PM->TrlTblMtx)> LG(PM->TrlTblMtx);
127 for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) {
13
Assuming '__begin1' is equal to '__end1'
128 TranslationTable *TransTable =
129 &PM->HostEntriesBeginToTransTable[HostEntriesBegin];
130 if (TransTable->HostTable.EntriesBegin ==
131 TransTable->HostTable.EntriesEnd &&
132 !SupportsEmptyImages) {
133 // No host entry so no need to proceed
134 continue;
135 }
136
137 if (TransTable->TargetsTable[DeviceId] != 0) {
138 // Library entries have already been processed
139 continue;
140 }
141
142 // 1) get image.
143 assert(TransTable->TargetsImages.size() > (size_t)DeviceId &&(static_cast <bool> (TransTable->TargetsImages.size(
) > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TransTable->TargetsImages.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 144, __extension__
__PRETTY_FUNCTION__))
144 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TransTable->TargetsImages.size(
) > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TransTable->TargetsImages.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 144, __extension__
__PRETTY_FUNCTION__))
;
145 __tgt_device_image *Img = TransTable->TargetsImages[DeviceId];
146 if (!Img) {
147 REPORT("No image loaded for device id %d.\n", DeviceId)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "No image loaded for device id %d.\n", DeviceId); } while (
0);
;
148 Rc = OFFLOAD_FAIL(~0);
149 break;
150 }
151 // 2) load image into the target table.
152 __tgt_target_table *TargetTable = TransTable->TargetsTable[DeviceId] =
153 Device.loadBinary(Img);
154 // Unable to get table for this image: invalidate image and fail.
155 if (!TargetTable) {
156 REPORT("Unable to generate entries table for device id %d.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Unable to generate entries table for device id %d.\n", DeviceId
); } while (0);
157 DeviceId)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Unable to generate entries table for device id %d.\n", DeviceId
); } while (0);
;
158 TransTable->TargetsImages[DeviceId] = 0;
159 Rc = OFFLOAD_FAIL(~0);
160 break;
161 }
162
163 // Verify whether the two table sizes match.
164 size_t Hsize =
165 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
166 size_t Tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
167
168 // Invalid image for these host entries!
169 if (Hsize != Tsize) {
170 REPORT(do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
171 "Host and Target tables mismatch for device id %d [%zx != %zx].\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
172 DeviceId, Hsize, Tsize)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
;
173 TransTable->TargetsImages[DeviceId] = 0;
174 TransTable->TargetsTable[DeviceId] = 0;
175 Rc = OFFLOAD_FAIL(~0);
176 break;
177 }
178
179 DeviceTy::HDTTMapAccessorTy HDTTMap =
180 Device.HostDataToTargetMap.getExclusiveAccessor();
181
182 __tgt_target_table *HostTable = &TransTable->HostTable;
183 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
184 *CurrHostEntry = HostTable->EntriesBegin,
185 *EntryDeviceEnd = TargetTable->EntriesEnd;
186 CurrDeviceEntry != EntryDeviceEnd;
187 CurrDeviceEntry++, CurrHostEntry++) {
188 if (CurrDeviceEntry->size != 0) {
189 // has data.
190 assert(CurrDeviceEntry->size == CurrHostEntry->size &&(static_cast <bool> (CurrDeviceEntry->size == CurrHostEntry
->size && "data size mismatch") ? void (0) : __assert_fail
("CurrDeviceEntry->size == CurrHostEntry->size && \"data size mismatch\""
, "openmp/libomptarget/src/omptarget.cpp", 191, __extension__
__PRETTY_FUNCTION__))
191 "data size mismatch")(static_cast <bool> (CurrDeviceEntry->size == CurrHostEntry
->size && "data size mismatch") ? void (0) : __assert_fail
("CurrDeviceEntry->size == CurrHostEntry->size && \"data size mismatch\""
, "openmp/libomptarget/src/omptarget.cpp", 191, __extension__
__PRETTY_FUNCTION__))
;
192
193 // Fortran may use multiple weak declarations for the same symbol,
194 // therefore we must allow for multiple weak symbols to be loaded from
195 // the fat binary. Treat these mappings as any other "regular"
196 // mapping. Add entry to map.
197 if (Device.getTgtPtrBegin(HDTTMap, CurrHostEntry->addr,
198 CurrHostEntry->size))
199 continue;
200
201 DP("Add mapping from host " DPxMOD " to device " DPxMOD{}
202 " with size %zu"{}
203 "\n",{}
204 DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),{}
205 CurrDeviceEntry->size){};
206 HDTTMap->emplace(new HostDataToTargetTy(
207 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
208 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
209 (uintptr_t)CurrHostEntry->addr +
210 CurrHostEntry->size /*HstPtrEnd*/,
211 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
212 false /*UseHoldRefCount*/, CurrHostEntry->name,
213 true /*IsRefCountINF*/));
214
215 // Notify about the new mapping.
216 if (Device.notifyDataMapped(CurrHostEntry->addr, CurrHostEntry->size))
217 return OFFLOAD_FAIL(~0);
218 }
219 }
220 }
221 }
222
223 if (Rc
13.1
'Rc' is equal to OFFLOAD_SUCCESS
!= OFFLOAD_SUCCESS(0)) {
14
Taking false branch
224 return Rc;
225 }
226
227 /*
228 * Run ctors for static objects
229 */
230 if (!Device.PendingCtorsDtors.empty()) {
15
Assuming the condition is true
16
Taking true branch
231 AsyncInfoTy AsyncInfo(Device);
232 // Call all ctors for all libraries registered so far
233 for (auto &Lib : Device.PendingCtorsDtors) {
234 if (!Lib.second.PendingCtors.empty()) {
17
Assuming the condition is true
18
Taking true branch
235 DP("Has pending ctors... call now\n"){};
236 for (auto &Entry : Lib.second.PendingCtors) {
237 void *Ctor = Entry;
238 int Rc = target(nullptr, Device, Ctor, CTorDTorKernelArgs, AsyncInfo);
22
Calling 'target'
239 if (Rc
18.1
'Rc' is equal to OFFLOAD_SUCCESS
19.1
'Rc' is equal to OFFLOAD_SUCCESS
20.1
'Rc' is equal to OFFLOAD_SUCCESS
!= OFFLOAD_SUCCESS(0)) {
19
Taking false branch
20
Taking false branch
21
Taking false branch
240 REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(Ctor))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Running ctor " "0x%0*" "l" "x" " failed.\n", ((int)(2 * sizeof
(uintptr_t))), ((uintptr_t)(Ctor))); } while (0);
;
241 return OFFLOAD_FAIL(~0);
242 }
243 }
244 // Clear the list to indicate that this device has been used
245 Lib.second.PendingCtors.clear();
246 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(Lib.first)){};
247 }
248 }
249 // All constructors have been issued, wait for them now.
250 if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS(0))
251 return OFFLOAD_FAIL(~0);
252 }
253 Device.HasPendingGlobals = false;
254
255 return OFFLOAD_SUCCESS(0);
256}
257
258void handleTargetOutcome(bool Success, ident_t *Loc) {
259 switch (PM->TargetOffloadPolicy) {
260 case tgt_disabled:
261 if (Success) {
262 FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "expected no offloading while offloading is disabled"); abort
(); } while (0)
;
263 }
264 break;
265 case tgt_default:
266 FATAL_MESSAGE0(1, "default offloading policy must be switched to "do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "default offloading policy must be switched to " "mandatory or disabled"
); abort(); } while (0)
267 "mandatory or disabled")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "default offloading policy must be switched to " "mandatory or disabled"
); abort(); } while (0)
;
268 break;
269 case tgt_mandatory:
270 if (!Success) {
271 if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
272 for (auto &Device : PM->Devices)
273 dumpTargetPointerMappings(Loc, *Device);
274 else
275 FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html "do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"
); } while (0)
276 "for debugging options.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"
); } while (0)
;
277
278 if (PM->RTLs.UsedRTLs.empty()) {
279 llvm::SmallVector<llvm::StringRef> Archs;
280 llvm::transform(PM->Images, std::back_inserter(Archs),
281 [](const auto &x) {
282 return !x.second.Arch ? "empty" : x.second.Arch;
283 });
284 FAILURE_MESSAGE(do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "No images found compatible with the installed hardware. ")
; } while (0)
285 "No images found compatible with the installed hardware. ")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "No images found compatible with the installed hardware. ")
; } while (0)
;
286 fprintf(stderrstderr, "Found (%s)\n", llvm::join(Archs, ",").c_str());
287 }
288
289 SourceInfo Info(Loc);
290 if (Info.isAvailible())
291 fprintf(stderrstderr, "%s:%d:%d: ", Info.getFilename(), Info.getLine(),
292 Info.getColumn());
293 else
294 FAILURE_MESSAGE("Source location information not present. Compile with "do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Source location information not present. Compile with " "-g or -gline-tables-only.\n"
); } while (0)
295 "-g or -gline-tables-only.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Source location information not present. Compile with " "-g or -gline-tables-only.\n"
); } while (0)
;
296 FATAL_MESSAGE0(do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "failure of target construct while offloading is mandatory"
); abort(); } while (0)
297 1, "failure of target construct while offloading is mandatory")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "failure of target construct while offloading is mandatory"
); abort(); } while (0)
;
298 } else {
299 if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
300 for (auto &Device : PM->Devices)
301 dumpTargetPointerMappings(Loc, *Device);
302 }
303 break;
304 }
305}
306
307static void handleDefaultTargetOffload() {
308 std::lock_guard<decltype(PM->TargetOffloadMtx)> LG(PM->TargetOffloadMtx);
309 if (PM->TargetOffloadPolicy == tgt_default) {
310 if (omp_get_num_devices() > 0) {
311 DP("Default TARGET OFFLOAD policy is now mandatory "{}
312 "(devices were found)\n"){};
313 PM->TargetOffloadPolicy = tgt_mandatory;
314 } else {
315 DP("Default TARGET OFFLOAD policy is now disabled "{}
316 "(no devices were found)\n"){};
317 PM->TargetOffloadPolicy = tgt_disabled;
318 }
319 }
320}
321
322static bool isOffloadDisabled() {
323 if (PM->TargetOffloadPolicy == tgt_default)
324 handleDefaultTargetOffload();
325 return PM->TargetOffloadPolicy == tgt_disabled;
326}
327
328// If offload is enabled, ensure that device DeviceID has been initialized,
329// global ctors have been executed, and global data has been mapped.
330//
331// The return bool indicates if the offload is to the host device
332// There are three possible results:
333// - Return false if the taregt device is ready for offload
334// - Return true without reporting a runtime error if offload is
335// disabled, perhaps because the initial device was specified.
336// - Report a runtime error and return true.
337//
338// If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device.
339// This step might be skipped if offload is disabled.
340bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) {
341 if (isOffloadDisabled()) {
1
Taking false branch
342 DP("Offload is disabled\n"){};
343 return true;
344 }
345
346 if (DeviceID == OFFLOAD_DEVICE_DEFAULT-1) {
2
Assuming the condition is false
3
Taking false branch
347 DeviceID = omp_get_default_device();
348 DP("Use default device id %" PRId64 "\n", DeviceID){};
349 }
350
351 // Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669.
352 if (omp_get_num_devices() == 0) {
4
Assuming the condition is false
5
Taking false branch
353 DP("omp_get_num_devices() == 0 but offload is manadatory\n"){};
354 handleTargetOutcome(false, Loc);
355 return true;
356 }
357
358 if (DeviceID == omp_get_initial_device()) {
6
Assuming the condition is false
7
Taking false branch
359 DP("Device is host (%" PRId64 "), returning as if offload is disabled\n",{}
360 DeviceID){};
361 return true;
362 }
363
364 // Is device ready?
365 if (!deviceIsReady(DeviceID)) {
8
Assuming the condition is false
9
Taking false branch
366 REPORT("Device %" PRId64 " is not ready.\n", DeviceID)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Device %" "l" "d" " is not ready.\n", DeviceID); } while (
0);
;
367 handleTargetOutcome(false, Loc);
368 return true;
369 }
370
371 // Get device info.
372 DeviceTy &Device = *PM->Devices[DeviceID];
373
374 // Check whether global data has been mapped for this device
375 {
376 std::lock_guard<decltype(Device.PendingGlobalsMtx)> LG(
377 Device.PendingGlobalsMtx);
378 if (Device.HasPendingGlobals && initLibrary(Device) != OFFLOAD_SUCCESS(0)) {
10
Assuming field 'HasPendingGlobals' is true
11
Calling 'initLibrary'
379 REPORT("Failed to init globals on device %" PRId64 "\n", DeviceID)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to init globals on device %" "l" "d" "\n", DeviceID
); } while (0);
;
380 handleTargetOutcome(false, Loc);
381 return true;
382 }
383 }
384
385 return false;
386}
387
388static int32_t getParentIndex(int64_t Type) {
389 return ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
390}
391
392void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
393 const char *Name) {
394 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
395 DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size){};
396
397 if (Size <= 0) {
398 DP("Call to %s with non-positive length\n", Name){};
399 return NULL__null;
400 }
401
402 void *Rc = NULL__null;
403
404 if (DeviceNum == omp_get_initial_device()) {
405 Rc = malloc(Size);
406 DP("%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc)){};
407 return Rc;
408 }
409
410 if (!deviceIsReady(DeviceNum)) {
411 DP("%s returns NULL ptr\n", Name){};
412 return NULL__null;
413 }
414
415 DeviceTy &Device = *PM->Devices[DeviceNum];
416 Rc = Device.allocData(Size, nullptr, Kind);
417 DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc)){};
418 return Rc;
419}
420
421void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
422 const char *Name) {
423 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
424 DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum,{}
425 DPxPTR(DevicePtr)){};
426
427 if (!DevicePtr) {
428 DP("Call to %s with NULL ptr\n", Name){};
429 return;
430 }
431
432 if (DeviceNum == omp_get_initial_device()) {
433 free(DevicePtr);
434 DP("%s deallocated host ptr\n", Name){};
435 return;
436 }
437
438 if (!deviceIsReady(DeviceNum)) {
439 DP("%s returns, nothing to do\n", Name){};
440 return;
441 }
442
443 PM->Devices[DeviceNum]->deleteData(DevicePtr, Kind);
444 DP("omp_target_free deallocated device ptr\n"){};
445}
446
447void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum,
448 const char *Name) {
449 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
450 DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size){};
451
452 if (Size <= 0) {
453 DP("Call to %s with non-positive length\n", Name){};
454 return NULL__null;
455 }
456
457 void *rc = NULL__null;
458
459 if (!deviceIsReady(DeviceNum)) {
460 DP("%s returns NULL ptr\n", Name){};
461 return NULL__null;
462 }
463
464 DeviceTy *DevicePtr = nullptr;
465 {
466 std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx);
467
468 if (!PM->Devices[DeviceNum]) {
469 DP("%s returns, device %d not available\n", Name, DeviceNum){};
470 return nullptr;
471 }
472
473 DevicePtr = PM->Devices[DeviceNum].get();
474 }
475
476 int32_t err = 0;
477 if (DevicePtr->RTL->data_lock) {
478 err = DevicePtr->RTL->data_lock(DeviceNum, HostPtr, Size, &rc);
479 if (err) {
480 DP("Could not lock ptr %p\n", HostPtr){};
481 return nullptr;
482 }
483 }
484 DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(rc)){};
485 return rc;
486}
487
488void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) {
489 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
490 DP("Call to %s for device %d unlocking\n", Name, DeviceNum){};
491
492 DeviceTy *DevicePtr = nullptr;
493 {
494 std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx);
495
496 // Don't check deviceIsReady as it can initialize the device if needed.
497 // Just check if DeviceNum exists as targetUnlockExplicit can be called
498 // during process exit/free (and it may have been already destroyed) and
499 // targetAllocExplicit will have already checked deviceIsReady anyway.
500 size_t DevicesSize = PM->Devices.size();
501
502 if (DevicesSize <= (size_t)DeviceNum) {
503 DP("Device ID %d does not have a matching RTL\n", DeviceNum){};
504 return;
505 }
506
507 if (!PM->Devices[DeviceNum]) {
508 DP("%s returns, device %d not available\n", Name, DeviceNum){};
509 return;
510 }
511
512 DevicePtr = PM->Devices[DeviceNum].get();
513 } // unlock RTLsMtx
514
515 if (DevicePtr->RTL->data_unlock)
516 DevicePtr->RTL->data_unlock(DeviceNum, HostPtr);
517
518 DP("%s returns\n", Name){};
519}
520
521/// Call the user-defined mapper function followed by the appropriate
522// targetData* function (targetData{Begin,End,Update}).
523int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
524 int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames,
525 void *ArgMapper, AsyncInfoTy &AsyncInfo,
526 TargetDataFuncPtrTy TargetDataFunction) {
527 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
528 DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)){};
529
530 // The mapper function fills up Components.
531 MapperComponentsTy MapperComponents;
532 MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(ArgMapper);
533 (*MapperFuncPtr)((void *)&MapperComponents, ArgBase, Arg, ArgSize, ArgType,
534 ArgNames);
535
536 // Construct new arrays for args_base, args, arg_sizes and arg_types
537 // using the information in MapperComponents and call the corresponding
538 // targetData* function using these new arrays.
539 SmallVector<void *> MapperArgsBase(MapperComponents.Components.size());
540 SmallVector<void *> MapperArgs(MapperComponents.Components.size());
541 SmallVector<int64_t> MapperArgSizes(MapperComponents.Components.size());
542 SmallVector<int64_t> MapperArgTypes(MapperComponents.Components.size());
543 SmallVector<void *> MapperArgNames(MapperComponents.Components.size());
544
545 for (unsigned I = 0, E = MapperComponents.Components.size(); I < E; ++I) {
546 auto &C = MapperComponents.Components[I];
547 MapperArgsBase[I] = C.Base;
548 MapperArgs[I] = C.Begin;
549 MapperArgSizes[I] = C.Size;
550 MapperArgTypes[I] = C.Type;
551 MapperArgNames[I] = C.Name;
552 }
553
554 int Rc = TargetDataFunction(Loc, Device, MapperComponents.Components.size(),
555 MapperArgsBase.data(), MapperArgs.data(),
556 MapperArgSizes.data(), MapperArgTypes.data(),
557 MapperArgNames.data(), /*arg_mappers*/ nullptr,
558 AsyncInfo, /*FromMapper=*/true);
559
560 return Rc;
561}
562
563/// Internal function to do the mapping and transfer the data to the device
564int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
565 void **ArgsBase, void **Args, int64_t *ArgSizes,
566 int64_t *ArgTypes, map_var_info_t *ArgNames,
567 void **ArgMappers, AsyncInfoTy &AsyncInfo,
568 bool FromMapper) {
569 // process each input.
570 for (int32_t I = 0; I < ArgNum; ++I) {
571 // Ignore private variables and arrays - there is no mapping for them.
572 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
573 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
574 continue;
575
576 if (ArgMappers && ArgMappers[I]) {
577 // Instead of executing the regular path of targetDataBegin, call the
578 // targetDataMapper variant which will call targetDataBegin again
579 // with new arguments.
580 DP("Calling targetDataMapper for the %dth argument\n", I){};
581
582 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
583 int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
584 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
585 targetDataBegin);
586
587 if (Rc != OFFLOAD_SUCCESS(0)) {
588 REPORT("Call to targetDataBegin via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
589 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
590 return OFFLOAD_FAIL(~0);
591 }
592
593 // Skip the rest of this function, continue to the next argument.
594 continue;
595 }
596
597 void *HstPtrBegin = Args[I];
598 void *HstPtrBase = ArgsBase[I];
599 int64_t DataSize = ArgSizes[I];
600 map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
601
602 // Adjust for proper alignment if this is a combined entry (for structs).
603 // Look at the next argument - if that is MEMBER_OF this one, then this one
604 // is a combined entry.
605 int64_t Padding = 0;
606 const int NextI = I + 1;
607 if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
608 getParentIndex(ArgTypes[NextI]) == I) {
609 int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase);
610 Padding = (int64_t)HstPtrBegin % Alignment;
611 if (Padding) {
612 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD{}
613 "\n",{}
614 Padding, DPxPTR(HstPtrBegin)){};
615 HstPtrBegin = (char *)HstPtrBegin - Padding;
616 DataSize += Padding;
617 }
618 }
619
620 // Address of pointer on the host and device, respectively.
621 void *PointerHstPtrBegin, *PointerTgtPtrBegin;
622 TargetPointerResultTy PointerTpr;
623 bool IsHostPtr = false;
624 bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
625 // Force the creation of a device side copy of the data when:
626 // a close map modifier was associated with a map that contained a to.
627 bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE;
628 bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
629 bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
630 // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
631 // have reached this point via __tgt_target_data_begin and not __tgt_target
632 // then no argument is marked as TARGET_PARAM ("omp target data map" is not
633 // associated with a target region, so there are no target parameters). This
634 // may be considered a hack, we could revise the scheme in the future.
635 bool UpdateRef =
636 !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0);
637
638 DeviceTy::HDTTMapAccessorTy HDTTMap =
639 Device.HostDataToTargetMap.getExclusiveAccessor();
640 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
641 DP("Has a pointer entry: \n"){};
642 // Base is address of pointer.
643 //
644 // Usually, the pointer is already allocated by this time. For example:
645 //
646 // #pragma omp target map(s.p[0:N])
647 //
648 // The map entry for s comes first, and the PTR_AND_OBJ entry comes
649 // afterward, so the pointer is already allocated by the time the
650 // PTR_AND_OBJ entry is handled below, and PointerTgtPtrBegin is thus
651 // non-null. However, "declare target link" can produce a PTR_AND_OBJ
652 // entry for a global that might not already be allocated by the time the
653 // PTR_AND_OBJ entry is handled below, and so the allocation might fail
654 // when HasPresentModifier.
655 PointerTpr = Device.getTargetPointer(
656 HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *),
657 /*HstPtrName=*/nullptr,
658 /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef,
659 HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo,
660 /* OwnedTPR */ nullptr, /* ReleaseHDTTMap */ false);
661 PointerTgtPtrBegin = PointerTpr.TargetPointer;
662 IsHostPtr = PointerTpr.Flags.IsHostPointer;
663 if (!PointerTgtPtrBegin) {
664 REPORT("Call to getTargetPointer returned null pointer (%s).\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
665 HasPresentModifier ? "'present' map type modifier"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
666 : "device failure or illegal mapping")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
;
667 return OFFLOAD_FAIL(~0);
668 }
669 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"{}
670 "\n",{}
671 sizeof(void *), DPxPTR(PointerTgtPtrBegin),{}
672 (PointerTpr.Flags.IsNewEntry ? "" : " not")){};
673 PointerHstPtrBegin = HstPtrBase;
674 // modify current entry.
675 HstPtrBase = *(void **)HstPtrBase;
676 // No need to update pointee ref count for the first element of the
677 // subelement that comes from mapper.
678 UpdateRef =
679 (!FromMapper || I != 0); // subsequently update ref count of pointee
680 }
681
682 const bool HasFlagTo = ArgTypes[I] & OMP_TGT_MAPTYPE_TO;
683 const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
684 // Note that HDTTMap will be released in getTargetPointer.
685 auto TPR = Device.getTargetPointer(
686 HDTTMap, HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo,
687 HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
688 HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry());
689 void *TgtPtrBegin = TPR.TargetPointer;
690 IsHostPtr = TPR.Flags.IsHostPointer;
691 // If data_size==0, then the argument could be a zero-length pointer to
692 // NULL, so getOrAlloc() returning NULL is not an error.
693 if (!TgtPtrBegin && (DataSize || HasPresentModifier)) {
694 REPORT("Call to getTargetPointer returned null pointer (%s).\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
695 HasPresentModifier ? "'present' map type modifier"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
696 : "device failure or illegal mapping")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
;
697 return OFFLOAD_FAIL(~0);
698 }
699 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD{}
700 " - is%s new\n",{}
701 DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")){};
702
703 if (ArgTypes[I] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
704 uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
705 void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
706 DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase)){};
707 ArgsBase[I] = TgtPtrBase;
708 }
709
710 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
711
712 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
713 void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
714
715 if (PointerTpr.getEntry()->addShadowPointer(ShadowPtrInfoTy{
716 (void **)PointerHstPtrBegin, HstPtrBase,
717 (void **)PointerTgtPtrBegin, ExpectedTgtPtrBase})) {
718 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",{}
719 DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)){};
720
721 void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation();
722 TgtPtrBase = ExpectedTgtPtrBase;
723
724 int Ret =
725 Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, sizeof(void *),
726 AsyncInfo, PointerTpr.getEntry());
727 if (Ret != OFFLOAD_SUCCESS(0)) {
728 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
729 return OFFLOAD_FAIL(~0);
730 }
731 if (PointerTpr.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
732 OFFLOAD_SUCCESS(0))
733 return OFFLOAD_FAIL(~0);
734 }
735 }
736
737 // Check if variable can be used on the device:
738 bool IsStructMember = ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF;
739 if (getInfoLevel() & OMP_INFOTYPE_EMPTY_MAPPING && ArgTypes[I] != 0 &&
740 !IsStructMember && !IsImplicit && !TPR.isPresent() &&
741 !TPR.isContained() && !TPR.isHostPointer())
742 INFO(OMP_INFOTYPE_EMPTY_MAPPING, Device.DeviceID,do { if (getDebugLevel() > 0) { {}; } else if (getInfoLevel
() & OMP_INFOTYPE_EMPTY_MAPPING) { do { fprintf(stderr, "Libomptarget"
" device %d info: ", (int)Device.DeviceID); fprintf(stderr, "variable %s does not have a valid device counterpart\n"
, (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"
); } while (0); } } while (false)
743 "variable %s does not have a valid device counterpart\n",do { if (getDebugLevel() > 0) { {}; } else if (getInfoLevel
() & OMP_INFOTYPE_EMPTY_MAPPING) { do { fprintf(stderr, "Libomptarget"
" device %d info: ", (int)Device.DeviceID); fprintf(stderr, "variable %s does not have a valid device counterpart\n"
, (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"
); } while (0); } } while (false)
744 (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown")do { if (getDebugLevel() > 0) { {}; } else if (getInfoLevel
() & OMP_INFOTYPE_EMPTY_MAPPING) { do { fprintf(stderr, "Libomptarget"
" device %d info: ", (int)Device.DeviceID); fprintf(stderr, "variable %s does not have a valid device counterpart\n"
, (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"
); } while (0); } } while (false)
;
745 }
746
747 return OFFLOAD_SUCCESS(0);
748}
749
750namespace {
751/// This structure contains information to deallocate a target pointer, aka.
752/// used to fix up the shadow map and potentially delete the entry from the
753/// mapping table via \p DeviceTy::deallocTgtPtr.
754struct PostProcessingInfo {
755 /// Host pointer used to look up into the map table
756 void *HstPtrBegin;
757
758 /// Size of the data
759 int64_t DataSize;
760
761 /// The mapping type (bitfield).
762 int64_t ArgType;
763
764 /// The target pointer information.
765 TargetPointerResultTy TPR;
766
767 PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType,
768 TargetPointerResultTy &&TPR)
769 : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType),
770 TPR(std::move(TPR)) {}
771};
772
773} // namespace
774
775/// Applies the necessary post-processing procedures to entries listed in \p
776/// EntriesInfo after the execution of all device side operations from a target
777/// data end. This includes the update of pointers at the host and removal of
778/// device buffer when needed. It returns OFFLOAD_FAIL or OFFLOAD_SUCCESS
779/// according to the successfulness of the operations.
780[[nodiscard]] static int
781postProcessingTargetDataEnd(DeviceTy *Device,
782 SmallVector<PostProcessingInfo> &EntriesInfo) {
783 int Ret = OFFLOAD_SUCCESS(0);
784
785 for (auto &[HstPtrBegin, DataSize, ArgType, TPR] : EntriesInfo) {
786 bool DelEntry = !TPR.isHostPointer();
787
788 // If the last element from the mapper (for end transfer args comes in
789 // reverse order), do not remove the partial entry, the parent struct still
790 // exists.
791 if ((ArgType & OMP_TGT_MAPTYPE_MEMBER_OF) &&
792 !(ArgType & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
793 DelEntry = false; // protect parent struct from being deallocated
794 }
795
796 // If we marked the entry to be deleted we need to verify no other
797 // thread reused it by now. If deletion is still supposed to happen by
798 // this thread LR will be set and exclusive access to the HDTT map
799 // will avoid another thread reusing the entry now. Note that we do
800 // not request (exclusive) access to the HDTT map if DelEntry is
801 // not set.
802 DeviceTy::HDTTMapAccessorTy HDTTMap =
803 Device->HostDataToTargetMap.getExclusiveAccessor();
804
805 // We cannot use a lock guard because we may end up delete the mutex.
806 // We also explicitly unlocked the entry after it was put in the EntriesInfo
807 // so it can be reused.
808 TPR.getEntry()->lock();
809 auto *Entry = TPR.getEntry();
810
811 const bool IsNotLastUser = Entry->decDataEndThreadCount() != 0;
812 if (DelEntry && (Entry->getTotalRefCount() != 0 || IsNotLastUser)) {
813 // The thread is not in charge of deletion anymore. Give up access
814 // to the HDTT map and unset the deletion flag.
815 HDTTMap.destroy();
816 DelEntry = false;
817 }
818
819 // If we copied back to the host a struct/array containing pointers,
820 // we need to restore the original host pointer values from their
821 // shadow copies. If the struct is going to be deallocated, remove any
822 // remaining shadow pointer entries for this struct.
823 const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
824 if (HasFrom) {
825 Entry->foreachShadowPointerInfo(
826 [&](const ShadowPtrInfoTy &ShadowPtr) {
827 *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
828 DP("Restoring original host pointer value " DPxMOD " for host "{}
829 "pointer " DPxMOD "\n",{}
830 DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)){};
831 return OFFLOAD_SUCCESS(0);
832 });
833 }
834
835 // Give up the lock as we either don't need it anymore (e.g., done with
836 // TPR), or erase TPR.
837 TPR.setEntry(nullptr);
838
839 if (!DelEntry)
840 continue;
841
842 Ret = Device->eraseMapEntry(HDTTMap, Entry, DataSize);
843 // Entry is already remove from the map, we can unlock it now.
844 HDTTMap.destroy();
845 Ret |= Device->deallocTgtPtrAndEntry(Entry, DataSize);
846 if (Ret != OFFLOAD_SUCCESS(0)) {
847 REPORT("Deallocating data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Deallocating data from device failed.\n"); } while (0);
;
848 break;
849 }
850 }
851
852 delete &EntriesInfo;
853 return Ret;
854}
855
856/// Internal function to undo the mapping and retrieve the data from the device.
857int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
858 void **ArgBases, void **Args, int64_t *ArgSizes,
859 int64_t *ArgTypes, map_var_info_t *ArgNames,
860 void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
861 int Ret = OFFLOAD_SUCCESS(0);
862 auto *PostProcessingPtrs = new SmallVector<PostProcessingInfo>();
38
Memory is allocated
863 // process each input.
864 for (int32_t I = ArgNum - 1; I >= 0; --I) {
865 // Ignore private variables and arrays - there is no mapping for them.
866 // Also, ignore the use_device_ptr directive, it has no effect here.
867 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
39
Assuming the condition is false
868 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
40
Assuming the condition is false
869 continue;
870
871 if (ArgMappers && ArgMappers[I]) {
41
Assuming 'ArgMappers' is non-null
42
Assuming the condition is true
872 // Instead of executing the regular path of targetDataEnd, call the
873 // targetDataMapper variant which will call targetDataEnd again
874 // with new arguments.
875 DP("Calling targetDataMapper for the %dth argument\n", I){};
876
877 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
43
Assuming 'ArgNames' is null
44
'?' condition is true
878 Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I],
879 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
880 targetDataEnd);
881
882 if (Ret != OFFLOAD_SUCCESS(0)) {
45
Assuming 'Ret' is not equal to OFFLOAD_SUCCESS
46
Taking true branch
883 REPORT("Call to targetDataEnd via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
47
Potential leak of memory pointed to by 'PostProcessingPtrs'
884 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
885 return OFFLOAD_FAIL(~0);
886 }
887
888 // Skip the rest of this function, continue to the next argument.
889 continue;
890 }
891
892 void *HstPtrBegin = Args[I];
893 void *HstPtrBase = ArgBases[I];
894 int64_t DataSize = ArgSizes[I];
895 // Adjust for proper alignment if this is a combined entry (for structs).
896 // Look at the next argument - if that is MEMBER_OF this one, then this one
897 // is a combined entry.
898 const int NextI = I + 1;
899 if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
900 getParentIndex(ArgTypes[NextI]) == I) {
901 int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase);
902 int64_t Padding = (int64_t)HstPtrBegin % Alignment;
903 if (Padding) {
904 DP("Using a Padding of %" PRId64 " bytes for begin address " DPxMOD{}
905 "\n",{}
906 Padding, DPxPTR(HstPtrBegin)){};
907 HstPtrBegin = (char *)HstPtrBegin - Padding;
908 DataSize += Padding;
909 }
910 }
911
912 bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
913 bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
914 (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
915 !(FromMapper && I == 0);
916 bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;
917 bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
918 bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
919
920 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
921 TargetPointerResultTy TPR =
922 Device.getTgtPtrBegin(HstPtrBegin, DataSize, UpdateRef, HasHoldModifier,
923 !IsImplicit, ForceDelete, /*FromDataEnd=*/true);
924 void *TgtPtrBegin = TPR.TargetPointer;
925 if (!TPR.isPresent() && !TPR.isHostPointer() &&
926 (DataSize || HasPresentModifier)) {
927 DP("Mapping does not exist (%s)\n",{}
928 (HasPresentModifier ? "'present' map type modifier" : "ignored")){};
929 if (HasPresentModifier) {
930 // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13:
931 // "If a map clause appears on a target, target data, target enter data
932 // or target exit data construct with a present map-type-modifier then
933 // on entry to the region if the corresponding list item does not appear
934 // in the device data environment then an error occurs and the program
935 // terminates."
936 //
937 // This should be an error upon entering an "omp target exit data". It
938 // should not be an error upon exiting an "omp target data" or "omp
939 // target". For "omp target data", Clang thus doesn't include present
940 // modifiers for end calls. For "omp target", we have not found a valid
941 // OpenMP program for which the error matters: it appears that, if a
942 // program can guarantee that data is present at the beginning of an
943 // "omp target" region so that there's no error there, that data is also
944 // guaranteed to be present at the end.
945 MESSAGE("device mapping required by 'present' map type modifier does "do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
946 "not exist for host address " DPxMOD " (%" PRId64 " bytes)",do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
947 DPxPTR(HstPtrBegin), DataSize)do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
;
948 return OFFLOAD_FAIL(~0);
949 }
950 } else {
951 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD{}
952 " - is%s last\n",{}
953 DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not")){};
954 }
955
956 // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16:
957 // "If the map clause appears on a target, target data, or target exit data
958 // construct and a corresponding list item of the original list item is not
959 // present in the device data environment on exit from the region then the
960 // list item is ignored."
961 if (!TPR.isPresent())
962 continue;
963
964 // Move data back to the host
965 const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
966 const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM;
967 if (HasFrom && (HasAlways || TPR.Flags.IsLast) &&
968 !TPR.Flags.IsHostPointer && DataSize != 0) {
969 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",{}
970 DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)){};
971
972 // Wait for any previous transfer if an event is present.
973 if (void *Event = TPR.getEntry()->getEvent()) {
974 if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS(0)) {
975 REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to wait for event " "0x%0*" "l" "x" ".\n", ((int)(2
* sizeof(uintptr_t))), ((uintptr_t)(Event))); } while (0);
;
976 return OFFLOAD_FAIL(~0);
977 }
978 }
979
980 Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo,
981 TPR.getEntry());
982 if (Ret != OFFLOAD_SUCCESS(0)) {
983 REPORT("Copying data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data from device failed.\n"); } while (0);
;
984 return OFFLOAD_FAIL(~0);
985 }
986
987 // As we are expecting to delete the entry the d2h copy might race
988 // with another one that also tries to delete the entry. This happens
989 // as the entry can be reused and the reuse might happen after the
990 // copy-back was issued but before it completed. Since the reuse might
991 // also copy-back a value we would race.
992 if (TPR.Flags.IsLast) {
993 if (TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
994 OFFLOAD_SUCCESS(0))
995 return OFFLOAD_FAIL(~0);
996 }
997 }
998
999 // Add pointer to the buffer for post-synchronize processing.
1000 PostProcessingPtrs->emplace_back(HstPtrBegin, DataSize, ArgTypes[I],
1001 std::move(TPR));
1002 PostProcessingPtrs->back().TPR.getEntry()->unlock();
1003 }
1004
1005 // Add post-processing functions
1006 // TODO: We might want to remove `mutable` in the future by not changing the
1007 // captured variables somehow.
1008 AsyncInfo.addPostProcessingFunction([=, Device = &Device]() mutable -> int {
1009 return postProcessingTargetDataEnd(Device, *PostProcessingPtrs);
1010 });
1011
1012 return Ret;
1013}
1014
1015static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
1016 void *HstPtrBegin, int64_t ArgSize,
1017 int64_t ArgType, AsyncInfoTy &AsyncInfo) {
1018 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
1019 TargetPointerResultTy TPR =
1020 Device.getTgtPtrBegin(HstPtrBegin, ArgSize, /*UpdateRefCount=*/false,
1021 /*UseHoldRefCount=*/false, /*MustContain=*/true);
1022 void *TgtPtrBegin = TPR.TargetPointer;
1023 if (!TPR.isPresent()) {
1024 DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)){};
1025 if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
1026 MESSAGE("device mapping required by 'present' motion modifier does not "do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
1027 "exist for host address " DPxMOD " (%" PRId64 " bytes)",do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
1028 DPxPTR(HstPtrBegin), ArgSize)do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
;
1029 return OFFLOAD_FAIL(~0);
1030 }
1031 return OFFLOAD_SUCCESS(0);
1032 }
1033
1034 if (TPR.Flags.IsHostPointer) {
1035 DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",{}
1036 DPxPTR(HstPtrBegin)){};
1037 return OFFLOAD_SUCCESS(0);
1038 }
1039
1040 if (ArgType & OMP_TGT_MAPTYPE_TO) {
1041 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",{}
1042 ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)){};
1043 int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo,
1044 TPR.getEntry());
1045 if (Ret != OFFLOAD_SUCCESS(0)) {
1046 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1047 return OFFLOAD_FAIL(~0);
1048 }
1049 if (TPR.getEntry()) {
1050 int Ret = TPR.getEntry()->foreachShadowPointerInfo(
1051 [&](ShadowPtrInfoTy &ShadowPtr) {
1052 DP("Restoring original target pointer value " DPxMOD " for target "{}
1053 "pointer " DPxMOD "\n",{}
1054 DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr)){};
1055 Ret = Device.submitData(ShadowPtr.TgtPtrAddr,
1056 (void *)&ShadowPtr.TgtPtrVal,
1057 sizeof(void *), AsyncInfo);
1058 if (Ret != OFFLOAD_SUCCESS(0)) {
1059 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1060 return OFFLOAD_FAIL(~0);
1061 }
1062 return OFFLOAD_SUCCESS(0);
1063 });
1064 if (Ret != OFFLOAD_SUCCESS(0)) {
1065 DP("Updating shadow map failed\n"){};
1066 return Ret;
1067 }
1068 }
1069 }
1070
1071 if (ArgType & OMP_TGT_MAPTYPE_FROM) {
1072 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",{}
1073 ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)){};
1074 int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo,
1075 TPR.getEntry());
1076 if (Ret != OFFLOAD_SUCCESS(0)) {
1077 REPORT("Copying data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data from device failed.\n"); } while (0);
;
1078 return OFFLOAD_FAIL(~0);
1079 }
1080
1081 // Wait for device-to-host memcopies for whole struct to complete,
1082 // before restoring the correct host pointer.
1083 if (auto *Entry = TPR.getEntry()) {
1084 AsyncInfo.addPostProcessingFunction([=]() -> int {
1085 int Ret = Entry->foreachShadowPointerInfo(
1086 [&](const ShadowPtrInfoTy &ShadowPtr) {
1087 *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
1088 DP("Restoring original host pointer value " DPxMOD{}
1089 " for host pointer " DPxMOD "\n",{}
1090 DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)){};
1091 return OFFLOAD_SUCCESS(0);
1092 });
1093 Entry->unlock();
1094 if (Ret != OFFLOAD_SUCCESS(0)) {
1095 DP("Updating shadow map failed\n"){};
1096 return Ret;
1097 }
1098 return OFFLOAD_SUCCESS(0);
1099 });
1100 }
1101 }
1102
1103 return OFFLOAD_SUCCESS(0);
1104}
1105
1106static int targetDataNonContiguous(ident_t *Loc, DeviceTy &Device,
1107 void *ArgsBase,
1108 __tgt_target_non_contig *NonContig,
1109 uint64_t Size, int64_t ArgType,
1110 int CurrentDim, int DimSize, uint64_t Offset,
1111 AsyncInfoTy &AsyncInfo) {
1112 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
1113 int Ret = OFFLOAD_SUCCESS(0);
1114 if (CurrentDim < DimSize) {
1115 for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
1116 uint64_t CurOffset =
1117 (NonContig[CurrentDim].Offset + I) * NonContig[CurrentDim].Stride;
1118 // we only need to transfer the first element for the last dimension
1119 // since we've already got a contiguous piece.
1120 if (CurrentDim != DimSize - 1 || I == 0) {
1121 Ret = targetDataNonContiguous(Loc, Device, ArgsBase, NonContig, Size,
1122 ArgType, CurrentDim + 1, DimSize,
1123 Offset + CurOffset, AsyncInfo);
1124 // Stop the whole process if any contiguous piece returns anything
1125 // other than OFFLOAD_SUCCESS.
1126 if (Ret != OFFLOAD_SUCCESS(0))
1127 return Ret;
1128 }
1129 }
1130 } else {
1131 char *Ptr = (char *)ArgsBase + Offset;
1132 DP("Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64{}
1133 " len %" PRIu64 "\n",{}
1134 DPxPTR(Ptr), Offset, Size){};
1135 Ret = targetDataContiguous(Loc, Device, ArgsBase, Ptr, Size, ArgType,
1136 AsyncInfo);
1137 }
1138 return Ret;
1139}
1140
1141static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
1142 int32_t DimSize) {
1143 int RemovedDim = 0;
1144 for (int I = DimSize - 1; I > 0; --I) {
1145 if (NonContig[I].Count * NonContig[I].Stride == NonContig[I - 1].Stride)
1146 RemovedDim++;
1147 }
1148 return RemovedDim;
1149}
1150
1151/// Internal function to pass data to/from the target.
1152int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
1153 void **ArgsBase, void **Args, int64_t *ArgSizes,
1154 int64_t *ArgTypes, map_var_info_t *ArgNames,
1155 void **ArgMappers, AsyncInfoTy &AsyncInfo, bool) {
1156 // process each input.
1157 for (int32_t I = 0; I < ArgNum; ++I) {
1158 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
1159 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
1160 continue;
1161
1162 if (ArgMappers && ArgMappers[I]) {
1163 // Instead of executing the regular path of targetDataUpdate, call the
1164 // targetDataMapper variant which will call targetDataUpdate again
1165 // with new arguments.
1166 DP("Calling targetDataMapper for the %dth argument\n", I){};
1167
1168 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
1169 int Ret = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
1170 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
1171 targetDataUpdate);
1172
1173 if (Ret != OFFLOAD_SUCCESS(0)) {
1174 REPORT("Call to targetDataUpdate via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataUpdate via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
1175 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataUpdate via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
1176 return OFFLOAD_FAIL(~0);
1177 }
1178
1179 // Skip the rest of this function, continue to the next argument.
1180 continue;
1181 }
1182
1183 int Ret = OFFLOAD_SUCCESS(0);
1184
1185 if (ArgTypes[I] & OMP_TGT_MAPTYPE_NON_CONTIG) {
1186 __tgt_target_non_contig *NonContig = (__tgt_target_non_contig *)Args[I];
1187 int32_t DimSize = ArgSizes[I];
1188 uint64_t Size =
1189 NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
1190 int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
1191 Ret = targetDataNonContiguous(
1192 Loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
1193 /*current_dim=*/0, DimSize - MergedDim, /*offset=*/0, AsyncInfo);
1194 } else {
1195 Ret = targetDataContiguous(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
1196 ArgTypes[I], AsyncInfo);
1197 }
1198 if (Ret == OFFLOAD_FAIL(~0))
1199 return OFFLOAD_FAIL(~0);
1200 }
1201 return OFFLOAD_SUCCESS(0);
1202}
1203
1204static const unsigned LambdaMapping = OMP_TGT_MAPTYPE_PTR_AND_OBJ |
1205 OMP_TGT_MAPTYPE_LITERAL |
1206 OMP_TGT_MAPTYPE_IMPLICIT;
1207static bool isLambdaMapping(int64_t Mapping) {
1208 return (Mapping & LambdaMapping) == LambdaMapping;
1209}
1210
1211namespace {
1212/// Find the table information in the map or look it up in the translation
1213/// tables.
1214TableMap *getTableMap(void *HostPtr) {
1215 std::lock_guard<std::mutex> TblMapLock(PM->TblMapMtx);
1216 HostPtrToTableMapTy::iterator TableMapIt =
1217 PM->HostPtrToTableMap.find(HostPtr);
1218
1219 if (TableMapIt != PM->HostPtrToTableMap.end())
1220 return &TableMapIt->second;
1221
1222 // We don't have a map. So search all the registered libraries.
1223 TableMap *TM = nullptr;
1224 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1225 for (HostEntriesBeginToTransTableTy::iterator Itr =
1226 PM->HostEntriesBeginToTransTable.begin();
1227 Itr != PM->HostEntriesBeginToTransTable.end(); ++Itr) {
1228 // get the translation table (which contains all the good info).
1229 TranslationTable *TransTable = &Itr->second;
1230 // iterate over all the host table entries to see if we can locate the
1231 // host_ptr.
1232 __tgt_offload_entry *Cur = TransTable->HostTable.EntriesBegin;
1233 for (uint32_t I = 0; Cur < TransTable->HostTable.EntriesEnd; ++Cur, ++I) {
1234 if (Cur->addr != HostPtr)
1235 continue;
1236 // we got a match, now fill the HostPtrToTableMap so that we
1237 // may avoid this search next time.
1238 TM = &(PM->HostPtrToTableMap)[HostPtr];
1239 TM->Table = TransTable;
1240 TM->Index = I;
1241 return TM;
1242 }
1243 }
1244
1245 return nullptr;
1246}
1247
1248/// A class manages private arguments in a target region.
1249class PrivateArgumentManagerTy {
1250 /// A data structure for the information of first-private arguments. We can
1251 /// use this information to optimize data transfer by packing all
1252 /// first-private arguments and transfer them all at once.
1253 struct FirstPrivateArgInfoTy {
1254 /// Host pointer begin
1255 char *HstPtrBegin;
1256 /// Host pointer end
1257 char *HstPtrEnd;
1258 /// The index of the element in \p TgtArgs corresponding to the argument
1259 int Index;
1260 /// Alignment of the entry (base of the entry, not after the entry).
1261 uint32_t Alignment;
1262 /// Size (without alignment, see padding)
1263 uint32_t Size;
1264 /// Padding used to align this argument entry, if necessary.
1265 uint32_t Padding;
1266 /// Host pointer name
1267 map_var_info_t HstPtrName = nullptr;
1268
1269 FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size,
1270 uint32_t Alignment, uint32_t Padding,
1271 const map_var_info_t HstPtrName = nullptr)
1272 : HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
1273 HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment),
1274 Size(Size), Padding(Padding), HstPtrName(HstPtrName) {}
1275 };
1276
1277 /// A vector of target pointers for all private arguments
1278 SmallVector<void *> TgtPtrs;
1279
1280 /// A vector of information of all first-private arguments to be packed
1281 SmallVector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
1282 /// Host buffer for all arguments to be packed
1283 SmallVector<char> FirstPrivateArgBuffer;
1284 /// The total size of all arguments to be packed
1285 int64_t FirstPrivateArgSize = 0;
1286
1287 /// A reference to the \p DeviceTy object
1288 DeviceTy &Device;
1289 /// A pointer to a \p AsyncInfoTy object
1290 AsyncInfoTy &AsyncInfo;
1291
1292 // TODO: What would be the best value here? Should we make it configurable?
1293 // If the size is larger than this threshold, we will allocate and transfer it
1294 // immediately instead of packing it.
1295 static constexpr const int64_t FirstPrivateArgSizeThreshold = 1024;
1296
1297public:
1298 /// Constructor
1299 PrivateArgumentManagerTy(DeviceTy &Dev, AsyncInfoTy &AsyncInfo)
1300 : Device(Dev), AsyncInfo(AsyncInfo) {}
1301
1302 /// Add a private argument
1303 int addArg(void *HstPtr, int64_t ArgSize, int64_t ArgOffset,
1304 bool IsFirstPrivate, void *&TgtPtr, int TgtArgsIndex,
1305 const map_var_info_t HstPtrName = nullptr,
1306 const bool AllocImmediately = false) {
1307 // If the argument is not first-private, or its size is greater than a
1308 // predefined threshold, we will allocate memory and issue the transfer
1309 // immediately.
1310 if (ArgSize > FirstPrivateArgSizeThreshold || !IsFirstPrivate ||
1311 AllocImmediately) {
1312 TgtPtr = Device.allocData(ArgSize, HstPtr);
1313 if (!TgtPtr) {
1314 DP("Data allocation for %sprivate array " DPxMOD " failed.\n",{}
1315 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtr)){};
1316 return OFFLOAD_FAIL(~0);
1317 }
1318#ifdef OMPTARGET_DEBUG
1319 void *TgtPtrBase = (void *)((intptr_t)TgtPtr + ArgOffset);
1320 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD{}
1321 " for %sprivate array " DPxMOD " - pushing target argument " DPxMOD{}
1322 "\n",{}
1323 ArgSize, DPxPTR(TgtPtr), (IsFirstPrivate ? "first-" : ""),{}
1324 DPxPTR(HstPtr), DPxPTR(TgtPtrBase)){};
1325#endif
1326 // If first-private, copy data from host
1327 if (IsFirstPrivate) {
1328 DP("Submitting firstprivate data to the device.\n"){};
1329 int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, AsyncInfo);
1330 if (Ret != OFFLOAD_SUCCESS(0)) {
1331 DP("Copying data to device failed, failed.\n"){};
1332 return OFFLOAD_FAIL(~0);
1333 }
1334 }
1335 TgtPtrs.push_back(TgtPtr);
1336 } else {
1337 DP("Firstprivate array " DPxMOD " of size %" PRId64 " will be packed\n",{}
1338 DPxPTR(HstPtr), ArgSize){};
1339 // When reach this point, the argument must meet all following
1340 // requirements:
1341 // 1. Its size does not exceed the threshold (see the comment for
1342 // FirstPrivateArgSizeThreshold);
1343 // 2. It must be first-private (needs to be mapped to target device).
1344 // We will pack all this kind of arguments to transfer them all at once
1345 // to reduce the number of data transfer. We will not take
1346 // non-first-private arguments, aka. private arguments that doesn't need
1347 // to be mapped to target device, into account because data allocation
1348 // can be very efficient with memory manager.
1349
1350 // Placeholder value
1351 TgtPtr = nullptr;
1352 auto *LastFPArgInfo =
1353 FirstPrivateArgInfo.empty() ? nullptr : &FirstPrivateArgInfo.back();
1354
1355 // Compute the start alignment of this entry, add padding if necessary.
1356 // TODO: Consider sorting instead.
1357 uint32_t Padding = 0;
1358 uint32_t StartAlignment =
1359 LastFPArgInfo ? LastFPArgInfo->Alignment : MaxAlignment;
1360 if (LastFPArgInfo) {
1361 // Check if we keep the start alignment or if it is shrunk due to the
1362 // size of the last element.
1363 uint32_t Offset = LastFPArgInfo->Size % StartAlignment;
1364 if (Offset)
1365 StartAlignment = Offset;
1366 // We only need as much alignment as the host pointer had (since we
1367 // don't know the alignment information from the source we might end up
1368 // overaligning accesses but not too much).
1369 uint32_t RequiredAlignment =
1370 llvm::bit_floor(getPartialStructRequiredAlignment(HstPtr));
1371 if (RequiredAlignment > StartAlignment) {
1372 Padding = RequiredAlignment - StartAlignment;
1373 StartAlignment = RequiredAlignment;
1374 }
1375 }
1376
1377 FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
1378 StartAlignment, Padding, HstPtrName);
1379 FirstPrivateArgSize += Padding + ArgSize;
1380 }
1381
1382 return OFFLOAD_SUCCESS(0);
1383 }
1384
1385 /// Pack first-private arguments, replace place holder pointers in \p TgtArgs,
1386 /// and start the transfer.
1387 int packAndTransfer(SmallVector<void *> &TgtArgs) {
1388 if (!FirstPrivateArgInfo.empty()) {
1389 assert(FirstPrivateArgSize != 0 &&(static_cast <bool> (FirstPrivateArgSize != 0 &&
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")
? void (0) : __assert_fail ("FirstPrivateArgSize != 0 && \"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty\""
, "openmp/libomptarget/src/omptarget.cpp", 1390, __extension__
__PRETTY_FUNCTION__))
1390 "FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")(static_cast <bool> (FirstPrivateArgSize != 0 &&
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")
? void (0) : __assert_fail ("FirstPrivateArgSize != 0 && \"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty\""
, "openmp/libomptarget/src/omptarget.cpp", 1390, __extension__
__PRETTY_FUNCTION__))
;
1391 FirstPrivateArgBuffer.resize(FirstPrivateArgSize, 0);
1392 auto Itr = FirstPrivateArgBuffer.begin();
1393 // Copy all host data to this buffer
1394 for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
1395 // First pad the pointer as we (have to) pad it on the device too.
1396 Itr = std::next(Itr, Info.Padding);
1397 std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
1398 Itr = std::next(Itr, Info.Size);
1399 }
1400 // Allocate target memory
1401 void *TgtPtr =
1402 Device.allocData(FirstPrivateArgSize, FirstPrivateArgBuffer.data());
1403 if (TgtPtr == nullptr) {
1404 DP("Failed to allocate target memory for private arguments.\n"){};
1405 return OFFLOAD_FAIL(~0);
1406 }
1407 TgtPtrs.push_back(TgtPtr);
1408 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD "\n",{}
1409 FirstPrivateArgSize, DPxPTR(TgtPtr)){};
1410 // Transfer data to target device
1411 int Ret = Device.submitData(TgtPtr, FirstPrivateArgBuffer.data(),
1412 FirstPrivateArgSize, AsyncInfo);
1413 if (Ret != OFFLOAD_SUCCESS(0)) {
1414 DP("Failed to submit data of private arguments.\n"){};
1415 return OFFLOAD_FAIL(~0);
1416 }
1417 // Fill in all placeholder pointers
1418 auto TP = reinterpret_cast<uintptr_t>(TgtPtr);
1419 for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
1420 void *&Ptr = TgtArgs[Info.Index];
1421 assert(Ptr == nullptr && "Target pointer is already set by mistaken")(static_cast <bool> (Ptr == nullptr && "Target pointer is already set by mistaken"
) ? void (0) : __assert_fail ("Ptr == nullptr && \"Target pointer is already set by mistaken\""
, "openmp/libomptarget/src/omptarget.cpp", 1421, __extension__
__PRETTY_FUNCTION__))
;
1422 // Pad the device pointer to get the right alignment.
1423 TP += Info.Padding;
1424 Ptr = reinterpret_cast<void *>(TP);
1425 TP += Info.Size;
1426 DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD{}
1427 "\n",{}
1428 DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin,{}
1429 DPxPTR(Ptr)){};
1430 }
1431 }
1432
1433 return OFFLOAD_SUCCESS(0);
1434 }
1435
1436 /// Free all target memory allocated for private arguments
1437 int free() {
1438 for (void *P : TgtPtrs) {
1439 int Ret = Device.deleteData(P);
1440 if (Ret != OFFLOAD_SUCCESS(0)) {
1441 DP("Deallocation of (first-)private arrays failed.\n"){};
1442 return OFFLOAD_FAIL(~0);
1443 }
1444 }
1445
1446 TgtPtrs.clear();
1447
1448 return OFFLOAD_SUCCESS(0);
1449 }
1450};
1451
1452/// Process data before launching the kernel, including calling targetDataBegin
1453/// to map and transfer data to target device, transferring (first-)private
1454/// variables.
1455static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
1456 int32_t ArgNum, void **ArgBases, void **Args,
1457 int64_t *ArgSizes, int64_t *ArgTypes,
1458 map_var_info_t *ArgNames, void **ArgMappers,
1459 SmallVector<void *> &TgtArgs,
1460 SmallVector<ptrdiff_t> &TgtOffsets,
1461 PrivateArgumentManagerTy &PrivateArgumentManager,
1462 AsyncInfoTy &AsyncInfo) {
1463 TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("mappingBeforeTargetRegion"
, SI.getProfileLocation())
;
1464 DeviceTy &Device = *PM->Devices[DeviceId];
1465 int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
1466 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
1467 if (Ret != OFFLOAD_SUCCESS(0)) {
1468 REPORT("Call to targetDataBegin failed, abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin failed, abort target.\n"); } while
(0);
;
1469 return OFFLOAD_FAIL(~0);
1470 }
1471
1472 // List of (first-)private arrays allocated for this target region
1473 SmallVector<int> TgtArgsPositions(ArgNum, -1);
1474
1475 for (int32_t I = 0; I < ArgNum; ++I) {
1476 if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
1477 // This is not a target parameter, do not push it into TgtArgs.
1478 // Check for lambda mapping.
1479 if (isLambdaMapping(ArgTypes[I])) {
1480 assert((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) &&(static_cast <bool> ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF
) && "PTR_AND_OBJ must be also MEMBER_OF.") ? void (0
) : __assert_fail ("(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && \"PTR_AND_OBJ must be also MEMBER_OF.\""
, "openmp/libomptarget/src/omptarget.cpp", 1481, __extension__
__PRETTY_FUNCTION__))
1481 "PTR_AND_OBJ must be also MEMBER_OF.")(static_cast <bool> ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF
) && "PTR_AND_OBJ must be also MEMBER_OF.") ? void (0
) : __assert_fail ("(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && \"PTR_AND_OBJ must be also MEMBER_OF.\""
, "openmp/libomptarget/src/omptarget.cpp", 1481, __extension__
__PRETTY_FUNCTION__))
;
1482 unsigned Idx = getParentIndex(ArgTypes[I]);
1483 int TgtIdx = TgtArgsPositions[Idx];
1484 assert(TgtIdx != -1 && "Base address must be translated already.")(static_cast <bool> (TgtIdx != -1 && "Base address must be translated already."
) ? void (0) : __assert_fail ("TgtIdx != -1 && \"Base address must be translated already.\""
, "openmp/libomptarget/src/omptarget.cpp", 1484, __extension__
__PRETTY_FUNCTION__))
;
1485 // The parent lambda must be processed already and it must be the last
1486 // in TgtArgs and TgtOffsets arrays.
1487 void *HstPtrVal = Args[I];
1488 void *HstPtrBegin = ArgBases[I];
1489 void *HstPtrBase = Args[Idx];
1490 void *TgtPtrBase =
1491 (void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]);
1492 DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)){};
1493 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
1494 void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
1495 void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();
1496 TargetPointerResultTy TPR = Device.getTgtPtrBegin(
1497 HstPtrVal, ArgSizes[I], /*UpdateRefCount=*/false,
1498 /*UseHoldRefCount=*/false);
1499 PointerTgtPtrBegin = TPR.TargetPointer;
1500 if (!TPR.isPresent()) {
1501 DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",{}
1502 DPxPTR(HstPtrVal)){};
1503 continue;
1504 }
1505 if (TPR.Flags.IsHostPointer) {
1506 DP("Unified memory is active, no need to map lambda captured"{}
1507 "variable (" DPxMOD ")\n",{}
1508 DPxPTR(HstPtrVal)){};
1509 continue;
1510 }
1511 DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",{}
1512 DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)){};
1513 Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin,
1514 sizeof(void *), AsyncInfo, TPR.getEntry());
1515 if (Ret != OFFLOAD_SUCCESS(0)) {
1516 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1517 return OFFLOAD_FAIL(~0);
1518 }
1519 }
1520 continue;
1521 }
1522 void *HstPtrBegin = Args[I];
1523 void *HstPtrBase = ArgBases[I];
1524 void *TgtPtrBegin;
1525 map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
1526 ptrdiff_t TgtBaseOffset;
1527 TargetPointerResultTy TPR;
1528 if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
1529 DP("Forwarding first-private value " DPxMOD " to the target construct\n",{}
1530 DPxPTR(HstPtrBase)){};
1531 TgtPtrBegin = HstPtrBase;
1532 TgtBaseOffset = 0;
1533 } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
1534 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
1535 const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);
1536 // If there is a next argument and it depends on the current one, we need
1537 // to allocate the private memory immediately. If this is not the case,
1538 // then the argument can be marked for optimization and packed with the
1539 // other privates.
1540 const bool AllocImmediately =
1541 (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
1542 Ret = PrivateArgumentManager.addArg(
1543 HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
1544 TgtArgs.size(), HstPtrName, AllocImmediately);
1545 if (Ret != OFFLOAD_SUCCESS(0)) {
1546 REPORT("Failed to process %sprivate argument " DPxMOD "\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process %sprivate argument " "0x%0*" "l" "x" "\n"
, (IsFirstPrivate ? "first-" : ""), ((int)(2 * sizeof(uintptr_t
))), ((uintptr_t)(HstPtrBegin))); } while (0);
1547 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process %sprivate argument " "0x%0*" "l" "x" "\n"
, (IsFirstPrivate ? "first-" : ""), ((int)(2 * sizeof(uintptr_t
))), ((uintptr_t)(HstPtrBegin))); } while (0);
;
1548 return OFFLOAD_FAIL(~0);
1549 }
1550 } else {
1551 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
1552 HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
1553 TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I],
1554 /*UpdateRefCount=*/false,
1555 /*UseHoldRefCount=*/false);
1556 TgtPtrBegin = TPR.TargetPointer;
1557 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
1558#ifdef OMPTARGET_DEBUG
1559 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
1560 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",{}
1561 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin)){};
1562#endif
1563 }
1564 TgtArgsPositions[I] = TgtArgs.size();
1565 TgtArgs.push_back(TgtPtrBegin);
1566 TgtOffsets.push_back(TgtBaseOffset);
1567 }
1568
1569 assert(TgtArgs.size() == TgtOffsets.size() &&(static_cast <bool> (TgtArgs.size() == TgtOffsets.size(
) && "Size mismatch in arguments and offsets") ? void
(0) : __assert_fail ("TgtArgs.size() == TgtOffsets.size() && \"Size mismatch in arguments and offsets\""
, "openmp/libomptarget/src/omptarget.cpp", 1570, __extension__
__PRETTY_FUNCTION__))
1570 "Size mismatch in arguments and offsets")(static_cast <bool> (TgtArgs.size() == TgtOffsets.size(
) && "Size mismatch in arguments and offsets") ? void
(0) : __assert_fail ("TgtArgs.size() == TgtOffsets.size() && \"Size mismatch in arguments and offsets\""
, "openmp/libomptarget/src/omptarget.cpp", 1570, __extension__
__PRETTY_FUNCTION__))
;
1571
1572 // Pack and transfer first-private arguments
1573 Ret = PrivateArgumentManager.packAndTransfer(TgtArgs);
1574 if (Ret != OFFLOAD_SUCCESS(0)) {
1575 DP("Failed to pack and transfer first private arguments\n"){};
1576 return OFFLOAD_FAIL(~0);
1577 }
1578
1579 return OFFLOAD_SUCCESS(0);
1580}
1581
1582/// Process data after launching the kernel, including transferring data back to
1583/// host if needed and deallocating target memory of (first-)private variables.
1584static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
1585 int32_t ArgNum, void **ArgBases, void **Args,
1586 int64_t *ArgSizes, int64_t *ArgTypes,
1587 map_var_info_t *ArgNames, void **ArgMappers,
1588 PrivateArgumentManagerTy &PrivateArgumentManager,
1589 AsyncInfoTy &AsyncInfo) {
1590 TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("mappingAfterTargetRegion"
, SI.getProfileLocation())
;
1591 DeviceTy &Device = *PM->Devices[DeviceId];
1592
1593 // Move data from device.
1594 int Ret = targetDataEnd(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
37
Calling 'targetDataEnd'
1595 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
1596 if (Ret != OFFLOAD_SUCCESS(0)) {
1597 REPORT("Call to targetDataEnd failed, abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd failed, abort target.\n"); } while (
0);
;
1598 return OFFLOAD_FAIL(~0);
1599 }
1600
1601 // Free target memory for private arguments after synchronization.
1602 // TODO: We might want to remove `mutable` in the future by not changing the
1603 // captured variables somehow.
1604 AsyncInfo.addPostProcessingFunction(
1605 [PrivateArgumentManager =
1606 std::move(PrivateArgumentManager)]() mutable -> int {
1607 int Ret = PrivateArgumentManager.free();
1608 if (Ret != OFFLOAD_SUCCESS(0)) {
1609 REPORT("Failed to deallocate target memory for private args\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to deallocate target memory for private args\n"); }
while (0);
;
1610 return OFFLOAD_FAIL(~0);
1611 }
1612 return Ret;
1613 });
1614
1615 return OFFLOAD_SUCCESS(0);
1616}
1617} // namespace
1618
1619/// performs the same actions as data_begin in case arg_num is
1620/// non-zero and initiates run of the offloaded region on the target platform;
1621/// if arg_num is non-zero after the region execution is done it also
1622/// performs the same action as data_update and data_end above. This function
1623/// returns 0 if it was able to transfer the execution to a target and an
1624/// integer different from zero otherwise.
1625int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
1626 KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
1627 int32_t DeviceId = Device.DeviceID;
1628 TableMap *TM = getTableMap(HostPtr);
1629 // No map for this host pointer found!
1630 if (!TM) {
23
Assuming 'TM' is non-null
24
Taking false branch
1631 REPORT("Host ptr " DPxMOD " does not have a matching target pointer.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
1632 DPxPTR(HostPtr))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
;
1633 return OFFLOAD_FAIL(~0);
1634 }
1635
1636 // get target table.
1637 __tgt_target_table *TargetTable = nullptr;
1638 {
1639 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1640 assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1641, __extension__
__PRETTY_FUNCTION__))
25
Assuming the condition is true
26
'?' condition is true
1641 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1641, __extension__
__PRETTY_FUNCTION__))
;
1642 TargetTable = TM->Table->TargetsTable[DeviceId];
1643 }
1644 assert(TargetTable && "Global data has not been mapped\n")(static_cast <bool> (TargetTable && "Global data has not been mapped\n"
) ? void (0) : __assert_fail ("TargetTable && \"Global data has not been mapped\\n\""
, "openmp/libomptarget/src/omptarget.cpp", 1644, __extension__
__PRETTY_FUNCTION__))
;
27
Assuming 'TargetTable' is non-null
28
'?' condition is true
1645
1646 DP("loop trip count is %" PRIu64 ".\n", KernelArgs.Tripcount){};
1647
1648 // We need to keep bases and offsets separate. Sometimes (e.g. in OpenCL) we
1649 // need to manifest base pointers prior to launching a kernel. Even if we have
1650 // mapped an object only partially, e.g. A[N:M], although the kernel is
1651 // expected to access elements starting at address &A[N] and beyond, we still
1652 // need to manifest the base of the array &A[0]. In other cases, e.g. the COI
1653 // API, we need the begin address itself, i.e. &A[N], as the API operates on
1654 // begin addresses, not bases. That's why we pass args and offsets as two
1655 // separate entities so that each plugin can do what it needs. This behavior
1656 // was introdued via https://reviews.llvm.org/D33028 and commit 1546d319244c.
1657 SmallVector<void *> TgtArgs;
1658 SmallVector<ptrdiff_t> TgtOffsets;
1659
1660 PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
1661
1662 int NumClangLaunchArgs = KernelArgs.NumArgs;
1663 int Ret = OFFLOAD_SUCCESS(0);
1664 if (NumClangLaunchArgs) {
29
Assuming 'NumClangLaunchArgs' is not equal to 0
30
Taking true branch
1665 // Process data, such as data mapping, before launching the kernel
1666 Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
1667 KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
1668 KernelArgs.ArgSizes, KernelArgs.ArgTypes,
1669 KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs,
1670 TgtOffsets, PrivateArgumentManager, AsyncInfo);
1671 if (Ret
30.1
'Ret' is equal to OFFLOAD_SUCCESS
!= OFFLOAD_SUCCESS(0)) {
31
Taking false branch
1672 REPORT("Failed to process data before launching the kernel.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process data before launching the kernel.\n"); }
while (0);
;
1673 return OFFLOAD_FAIL(~0);
1674 }
1675
1676 // Clang might pass more values via the ArgPtrs to the runtime that we pass
1677 // on to the kernel.
1678 // TOOD: Next time we adjust the KernelArgsTy we should introduce a new
1679 // NumKernelArgs field.
1680 KernelArgs.NumArgs = TgtArgs.size();
1681 }
1682
1683 // Launch device execution.
1684 void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr;
1685 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",{}
1686 TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index){};
1687
1688 {
1689 assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!")(static_cast <bool> (KernelArgs.NumArgs == TgtArgs.size
() && "Argument count mismatch!") ? void (0) : __assert_fail
("KernelArgs.NumArgs == TgtArgs.size() && \"Argument count mismatch!\""
, "openmp/libomptarget/src/omptarget.cpp", 1689, __extension__
__PRETTY_FUNCTION__))
;
32
'?' condition is true
1690 TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("Initiate Kernel Launch"
, SI.getProfileLocation())
;
1691 Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
1692 KernelArgs, AsyncInfo);
1693 }
1694
1695 if (Ret != OFFLOAD_SUCCESS(0)) {
33
Assuming 'Ret' is equal to OFFLOAD_SUCCESS
34
Taking false branch
1696 REPORT("Executing target region abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Executing target region abort target.\n"); } while (0);
;
1697 return OFFLOAD_FAIL(~0);
1698 }
1699
1700 if (NumClangLaunchArgs
34.1
'NumClangLaunchArgs' is not equal to 0
) {
35
Taking true branch
1701 // Transfer data back and deallocate target memory for (first-)private
1702 // variables
1703 Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
36
Calling 'processDataAfter'
1704 KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
1705 KernelArgs.ArgSizes, KernelArgs.ArgTypes,
1706 KernelArgs.ArgNames, KernelArgs.ArgMappers,
1707 PrivateArgumentManager, AsyncInfo);
1708 if (Ret != OFFLOAD_SUCCESS(0)) {
1709 REPORT("Failed to process data after launching the kernel.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process data after launching the kernel.\n"); } while
(0);
;
1710 return OFFLOAD_FAIL(~0);
1711 }
1712 }
1713
1714 return OFFLOAD_SUCCESS(0);
1715}
1716
1717/// Executes a kernel using pre-recorded information for loading to
1718/// device memory to launch the target kernel with the pre-recorded
1719/// configuration.
1720int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
1721 void *DeviceMemory, int64_t DeviceMemorySize, void **TgtArgs,
1722 ptrdiff_t *TgtOffsets, int32_t NumArgs, int32_t NumTeams,
1723 int32_t ThreadLimit, uint64_t LoopTripCount,
1724 AsyncInfoTy &AsyncInfo) {
1725 int32_t DeviceId = Device.DeviceID;
1726 TableMap *TM = getTableMap(HostPtr);
1727 // Fail if the table map fails to find the target kernel pointer for the
1728 // provided host pointer.
1729 if (!TM) {
1730 REPORT("Host ptr " DPxMOD " does not have a matching target pointer.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
1731 DPxPTR(HostPtr))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
;
1732 return OFFLOAD_FAIL(~0);
1733 }
1734
1735 // Retrieve the target table of offloading entries.
1736 __tgt_target_table *TargetTable = nullptr;
1737 {
1738 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1739 assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1740, __extension__
__PRETTY_FUNCTION__))
1740 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1740, __extension__
__PRETTY_FUNCTION__))
;
1741 TargetTable = TM->Table->TargetsTable[DeviceId];
1742 }
1743 assert(TargetTable && "Global data has not been mapped\n")(static_cast <bool> (TargetTable && "Global data has not been mapped\n"
) ? void (0) : __assert_fail ("TargetTable && \"Global data has not been mapped\\n\""
, "openmp/libomptarget/src/omptarget.cpp", 1743, __extension__
__PRETTY_FUNCTION__))
;
1744
1745 // Retrieve the target kernel pointer, allocate and store the recorded device
1746 // memory data, and launch device execution.
1747 void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr;
1748 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",{}
1749 TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index){};
1750
1751 void *TgtPtr = Device.allocData(DeviceMemorySize, /* HstPtr */ nullptr,
1752 TARGET_ALLOC_DEFAULT);
1753 Device.submitData(TgtPtr, DeviceMemory, DeviceMemorySize, AsyncInfo);
1754
1755 KernelArgsTy KernelArgs = {0};
1756 KernelArgs.Version = 2;
1757 KernelArgs.NumArgs = NumArgs;
1758 KernelArgs.Tripcount = LoopTripCount;
1759 KernelArgs.NumTeams[0] = NumTeams;
1760 KernelArgs.ThreadLimit[0] = ThreadLimit;
1761
1762 int Ret = Device.launchKernel(TgtEntryPtr, TgtArgs, TgtOffsets, KernelArgs,
1763 AsyncInfo);
1764
1765 if (Ret != OFFLOAD_SUCCESS(0)) {
1766 REPORT("Executing target region abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Executing target region abort target.\n"); } while (0);
;
1767 return OFFLOAD_FAIL(~0);
1768 }
1769
1770 return OFFLOAD_SUCCESS(0);
1771}