Bug Summary

File:build/source/openmp/libomptarget/src/omptarget.cpp
Warning:line 635, column 7
Value stored to 'IsHostPtr' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name omptarget.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16 -I projects/openmp/libomptarget/src -I /build/source/openmp/libomptarget/src -I include -I /build/source/llvm/include -I projects/openmp/runtime/src -I /build/source/openmp/libomptarget/include -D OMPT_SUPPORT=1 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1674602410 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-01-25-024556-16494-1 -x c++ /build/source/openmp/libomptarget/src/omptarget.cpp
1//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the interface to be used by Clang during the codegen of a
10// target region.
11//
12//===----------------------------------------------------------------------===//
13
14#include "omptarget.h"
15#include "device.h"
16#include "private.h"
17#include "rtl.h"
18
19#include <cassert>
20#include <cstdint>
21#include <vector>
22
23using llvm::SmallVector;
24
25int AsyncInfoTy::synchronize() {
26 int Result = OFFLOAD_SUCCESS(0);
27 if (!isQueueEmpty()) {
28 switch (SyncType) {
29 case SyncTy::BLOCKING:
30 // If we have a queue we need to synchronize it now.
31 Result = Device.synchronize(*this);
32 assert(AsyncInfo.Queue == nullptr &&(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 34, __extension__ __PRETTY_FUNCTION__
))
33 "The device plugin should have nulled the queue to indicate there "(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 34, __extension__ __PRETTY_FUNCTION__
))
34 "are no outstanding actions!")(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 34, __extension__ __PRETTY_FUNCTION__
))
;
35 break;
36 case SyncTy::NON_BLOCKING:
37 Result = Device.queryAsync(*this);
38 break;
39 }
40 }
41
42 // Run any pending post-processing function registered on this async object.
43 if (Result == OFFLOAD_SUCCESS(0) && isQueueEmpty())
44 Result = runPostProcessing();
45
46 return Result;
47}
48
49void *&AsyncInfoTy::getVoidPtrLocation() {
50 BufferLocations.push_back(nullptr);
51 return BufferLocations.back();
52}
53
54bool AsyncInfoTy::isDone() {
55 synchronize();
56 // The async info operations are completed when the internal queue is empty.
57 return isQueueEmpty();
58}
59
60int32_t AsyncInfoTy::runPostProcessing() {
61 size_t Size = PostProcessingFunctions.size();
62 for (size_t I = 0; I < Size; ++I) {
63 const int Result = PostProcessingFunctions[I]();
64 if (Result != OFFLOAD_SUCCESS(0))
65 return Result;
66 }
67
68 // Clear the vector up until the last known function, since post-processing
69 // procedures might add new procedures themselves.
70 const auto PrevBegin = PostProcessingFunctions.begin();
71 PostProcessingFunctions.erase(PrevBegin, PrevBegin + Size);
72
73 return OFFLOAD_SUCCESS(0);
74}
75
76bool AsyncInfoTy::isQueueEmpty() const { return AsyncInfo.Queue == nullptr; }
77
78/* All begin addresses for partially mapped structs must be 8-aligned in order
79 * to ensure proper alignment of members. E.g.
80 *
81 * struct S {
82 * int a; // 4-aligned
83 * int b; // 4-aligned
84 * int *p; // 8-aligned
85 * } s1;
86 * ...
87 * #pragma omp target map(tofrom: s1.b, s1.p[0:N])
88 * {
89 * s1.b = 5;
90 * for (int i...) s1.p[i] = ...;
91 * }
92 *
93 * Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
94 * BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
95 * then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
96 * requirements for its type. Now, when we allocate memory on the device, in
97 * CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
98 * This means that the chunk of the struct on the device will start at a
99 * 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
100 * address of p will be a misaligned 0x204 (on the host there was no need to add
101 * padding between b and p, so p comes exactly 4 bytes after b). If the device
102 * kernel tries to access s1.p, a misaligned address error occurs (as reported
103 * by the CUDA plugin). By padding the begin address down to a multiple of 8 and
104 * extending the size of the allocated chuck accordingly, the chuck on the
105 * device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
106 * &s1.p=0x208, as they should be to satisfy the alignment requirements.
107 */
108static const int64_t Alignment = 8;
109
110/// Map global data and execute pending ctors
111static int initLibrary(DeviceTy &Device) {
112 /*
113 * Map global data
114 */
115 int32_t DeviceId = Device.DeviceID;
116 int Rc = OFFLOAD_SUCCESS(0);
117 bool SupportsEmptyImages = Device.RTL->supports_empty_images &&
118 Device.RTL->supports_empty_images() > 0;
119 {
120 std::lock_guard<decltype(PM->TrlTblMtx)> LG(PM->TrlTblMtx);
121 for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) {
122 TranslationTable *TransTable =
123 &PM->HostEntriesBeginToTransTable[HostEntriesBegin];
124 if (TransTable->HostTable.EntriesBegin ==
125 TransTable->HostTable.EntriesEnd &&
126 !SupportsEmptyImages) {
127 // No host entry so no need to proceed
128 continue;
129 }
130
131 if (TransTable->TargetsTable[DeviceId] != 0) {
132 // Library entries have already been processed
133 continue;
134 }
135
136 // 1) get image.
137 assert(TransTable->TargetsImages.size() > (size_t)DeviceId &&(static_cast <bool> (TransTable->TargetsImages.size(
) > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TransTable->TargetsImages.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 138, __extension__
__PRETTY_FUNCTION__))
138 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TransTable->TargetsImages.size(
) > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TransTable->TargetsImages.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 138, __extension__
__PRETTY_FUNCTION__))
;
139 __tgt_device_image *Img = TransTable->TargetsImages[DeviceId];
140 if (!Img) {
141 REPORT("No image loaded for device id %d.\n", DeviceId)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "No image loaded for device id %d.\n", DeviceId); } while (
0);
;
142 Rc = OFFLOAD_FAIL(~0);
143 break;
144 }
145 // 2) load image into the target table.
146 __tgt_target_table *TargetTable = TransTable->TargetsTable[DeviceId] =
147 Device.loadBinary(Img);
148 // Unable to get table for this image: invalidate image and fail.
149 if (!TargetTable) {
150 REPORT("Unable to generate entries table for device id %d.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Unable to generate entries table for device id %d.\n", DeviceId
); } while (0);
151 DeviceId)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Unable to generate entries table for device id %d.\n", DeviceId
); } while (0);
;
152 TransTable->TargetsImages[DeviceId] = 0;
153 Rc = OFFLOAD_FAIL(~0);
154 break;
155 }
156
157 // Verify whether the two table sizes match.
158 size_t Hsize =
159 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
160 size_t Tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
161
162 // Invalid image for these host entries!
163 if (Hsize != Tsize) {
164 REPORT(do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
165 "Host and Target tables mismatch for device id %d [%zx != %zx].\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
166 DeviceId, Hsize, Tsize)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
;
167 TransTable->TargetsImages[DeviceId] = 0;
168 TransTable->TargetsTable[DeviceId] = 0;
169 Rc = OFFLOAD_FAIL(~0);
170 break;
171 }
172
173 DeviceTy::HDTTMapAccessorTy HDTTMap =
174 Device.HostDataToTargetMap.getExclusiveAccessor();
175
176 __tgt_target_table *HostTable = &TransTable->HostTable;
177 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
178 *CurrHostEntry = HostTable->EntriesBegin,
179 *EntryDeviceEnd = TargetTable->EntriesEnd;
180 CurrDeviceEntry != EntryDeviceEnd;
181 CurrDeviceEntry++, CurrHostEntry++) {
182 if (CurrDeviceEntry->size != 0) {
183 // has data.
184 assert(CurrDeviceEntry->size == CurrHostEntry->size &&(static_cast <bool> (CurrDeviceEntry->size == CurrHostEntry
->size && "data size mismatch") ? void (0) : __assert_fail
("CurrDeviceEntry->size == CurrHostEntry->size && \"data size mismatch\""
, "openmp/libomptarget/src/omptarget.cpp", 185, __extension__
__PRETTY_FUNCTION__))
185 "data size mismatch")(static_cast <bool> (CurrDeviceEntry->size == CurrHostEntry
->size && "data size mismatch") ? void (0) : __assert_fail
("CurrDeviceEntry->size == CurrHostEntry->size && \"data size mismatch\""
, "openmp/libomptarget/src/omptarget.cpp", 185, __extension__
__PRETTY_FUNCTION__))
;
186
187 // Fortran may use multiple weak declarations for the same symbol,
188 // therefore we must allow for multiple weak symbols to be loaded from
189 // the fat binary. Treat these mappings as any other "regular"
190 // mapping. Add entry to map.
191 if (Device.getTgtPtrBegin(HDTTMap, CurrHostEntry->addr,
192 CurrHostEntry->size))
193 continue;
194
195 DP("Add mapping from host " DPxMOD " to device " DPxMOD{}
196 " with size %zu"{}
197 "\n",{}
198 DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),{}
199 CurrDeviceEntry->size){};
200 HDTTMap->emplace(new HostDataToTargetTy(
201 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
202 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
203 (uintptr_t)CurrHostEntry->addr +
204 CurrHostEntry->size /*HstPtrEnd*/,
205 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
206 false /*UseHoldRefCount*/, CurrHostEntry->name,
207 true /*IsRefCountINF*/));
208 }
209 }
210 }
211 }
212
213 if (Rc != OFFLOAD_SUCCESS(0)) {
214 return Rc;
215 }
216
217 /*
218 * Run ctors for static objects
219 */
220 if (!Device.PendingCtorsDtors.empty()) {
221 AsyncInfoTy AsyncInfo(Device);
222 // Call all ctors for all libraries registered so far
223 for (auto &Lib : Device.PendingCtorsDtors) {
224 if (!Lib.second.PendingCtors.empty()) {
225 DP("Has pending ctors... call now\n"){};
226 for (auto &Entry : Lib.second.PendingCtors) {
227 void *Ctor = Entry;
228 int Rc = target(nullptr, Device, Ctor, CTorDTorKernelArgs, AsyncInfo);
229 if (Rc != OFFLOAD_SUCCESS(0)) {
230 REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(Ctor))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Running ctor " "0x%0*" "l" "x" " failed.\n", ((int)(2 * sizeof
(uintptr_t))), ((uintptr_t)(Ctor))); } while (0);
;
231 return OFFLOAD_FAIL(~0);
232 }
233 }
234 // Clear the list to indicate that this device has been used
235 Lib.second.PendingCtors.clear();
236 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(Lib.first)){};
237 }
238 }
239 // All constructors have been issued, wait for them now.
240 if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS(0))
241 return OFFLOAD_FAIL(~0);
242 }
243 Device.HasPendingGlobals = false;
244
245 return OFFLOAD_SUCCESS(0);
246}
247
248void handleTargetOutcome(bool Success, ident_t *Loc) {
249 switch (PM->TargetOffloadPolicy) {
250 case tgt_disabled:
251 if (Success) {
252 FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "expected no offloading while offloading is disabled"); abort
(); } while (0)
;
253 }
254 break;
255 case tgt_default:
256 FATAL_MESSAGE0(1, "default offloading policy must be switched to "do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "default offloading policy must be switched to " "mandatory or disabled"
); abort(); } while (0)
257 "mandatory or disabled")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "default offloading policy must be switched to " "mandatory or disabled"
); abort(); } while (0)
;
258 break;
259 case tgt_mandatory:
260 if (!Success) {
261 if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
262 for (auto &Device : PM->Devices)
263 dumpTargetPointerMappings(Loc, *Device);
264 else
265 FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html "do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"
); } while (0)
266 "for debugging options.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"
); } while (0)
;
267
268 SourceInfo Info(Loc);
269 if (Info.isAvailible())
270 fprintf(stderrstderr, "%s:%d:%d: ", Info.getFilename(), Info.getLine(),
271 Info.getColumn());
272 else
273 FAILURE_MESSAGE("Source location information not present. Compile with "do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Source location information not present. Compile with " "-g or -gline-tables-only.\n"
); } while (0)
274 "-g or -gline-tables-only.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Source location information not present. Compile with " "-g or -gline-tables-only.\n"
); } while (0)
;
275 FATAL_MESSAGE0(do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "failure of target construct while offloading is mandatory"
); abort(); } while (0)
276 1, "failure of target construct while offloading is mandatory")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "failure of target construct while offloading is mandatory"
); abort(); } while (0)
;
277 } else {
278 if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
279 for (auto &Device : PM->Devices)
280 dumpTargetPointerMappings(Loc, *Device);
281 }
282 break;
283 }
284}
285
286static void handleDefaultTargetOffload() {
287 std::lock_guard<decltype(PM->TargetOffloadMtx)> LG(PM->TargetOffloadMtx);
288 if (PM->TargetOffloadPolicy == tgt_default) {
289 if (omp_get_num_devices() > 0) {
290 DP("Default TARGET OFFLOAD policy is now mandatory "{}
291 "(devices were found)\n"){};
292 PM->TargetOffloadPolicy = tgt_mandatory;
293 } else {
294 DP("Default TARGET OFFLOAD policy is now disabled "{}
295 "(no devices were found)\n"){};
296 PM->TargetOffloadPolicy = tgt_disabled;
297 }
298 }
299}
300
301static bool isOffloadDisabled() {
302 if (PM->TargetOffloadPolicy == tgt_default)
303 handleDefaultTargetOffload();
304 return PM->TargetOffloadPolicy == tgt_disabled;
305}
306
307// If offload is enabled, ensure that device DeviceID has been initialized,
308// global ctors have been executed, and global data has been mapped.
309//
310// The return bool indicates if the offload is to the host device
311// There are three possible results:
312// - Return false if the taregt device is ready for offload
313// - Return true without reporting a runtime error if offload is
314// disabled, perhaps because the initial device was specified.
315// - Report a runtime error and return true.
316//
317// If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device.
318// This step might be skipped if offload is disabled.
319bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) {
320 if (isOffloadDisabled()) {
321 DP("Offload is disabled\n"){};
322 return true;
323 }
324
325 if (DeviceID == OFFLOAD_DEVICE_DEFAULT-1) {
326 DeviceID = omp_get_default_device();
327 DP("Use default device id %" PRId64 "\n", DeviceID){};
328 }
329
330 // Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669.
331 if (omp_get_num_devices() == 0) {
332 DP("omp_get_num_devices() == 0 but offload is manadatory\n"){};
333 handleTargetOutcome(false, Loc);
334 return true;
335 }
336
337 if (DeviceID == omp_get_initial_device()) {
338 DP("Device is host (%" PRId64 "), returning as if offload is disabled\n",{}
339 DeviceID){};
340 return true;
341 }
342
343 // Is device ready?
344 if (!deviceIsReady(DeviceID)) {
345 REPORT("Device %" PRId64 " is not ready.\n", DeviceID)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Device %" "l" "d" " is not ready.\n", DeviceID); } while (
0);
;
346 handleTargetOutcome(false, Loc);
347 return true;
348 }
349
350 // Get device info.
351 DeviceTy &Device = *PM->Devices[DeviceID];
352
353 // Check whether global data has been mapped for this device
354 {
355 std::lock_guard<decltype(Device.PendingGlobalsMtx)> LG(
356 Device.PendingGlobalsMtx);
357 if (Device.HasPendingGlobals && initLibrary(Device) != OFFLOAD_SUCCESS(0)) {
358 REPORT("Failed to init globals on device %" PRId64 "\n", DeviceID)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to init globals on device %" "l" "d" "\n", DeviceID
); } while (0);
;
359 handleTargetOutcome(false, Loc);
360 return true;
361 }
362 }
363
364 return false;
365}
366
367static int32_t getParentIndex(int64_t Type) {
368 return ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
369}
370
371void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
372 const char *Name) {
373 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
374 DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size){};
375
376 if (Size <= 0) {
377 DP("Call to %s with non-positive length\n", Name){};
378 return NULL__null;
379 }
380
381 void *Rc = NULL__null;
382
383 if (DeviceNum == omp_get_initial_device()) {
384 Rc = malloc(Size);
385 DP("%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc)){};
386 return Rc;
387 }
388
389 if (!deviceIsReady(DeviceNum)) {
390 DP("%s returns NULL ptr\n", Name){};
391 return NULL__null;
392 }
393
394 DeviceTy &Device = *PM->Devices[DeviceNum];
395 Rc = Device.allocData(Size, nullptr, Kind);
396 DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc)){};
397 return Rc;
398}
399
400void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
401 const char *Name) {
402 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
403 DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum,{}
404 DPxPTR(DevicePtr)){};
405
406 if (!DevicePtr) {
407 DP("Call to %s with NULL ptr\n", Name){};
408 return;
409 }
410
411 if (DeviceNum == omp_get_initial_device()) {
412 free(DevicePtr);
413 DP("%s deallocated host ptr\n", Name){};
414 return;
415 }
416
417 if (!deviceIsReady(DeviceNum)) {
418 DP("%s returns, nothing to do\n", Name){};
419 return;
420 }
421
422 PM->Devices[DeviceNum]->deleteData(DevicePtr, Kind);
423 DP("omp_target_free deallocated device ptr\n"){};
424}
425
426void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum,
427 const char *Name) {
428 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
429 DP("Call to %s for device %d locking %zu bytes\n", Name, DeviceNum, Size){};
430
431 if (Size <= 0) {
432 DP("Call to %s with non-positive length\n", Name){};
433 return NULL__null;
434 }
435
436 void *rc = NULL__null;
437
438 if (!deviceIsReady(DeviceNum)) {
439 DP("%s returns NULL ptr\n", Name){};
440 return NULL__null;
441 }
442
443 DeviceTy *DevicePtr = nullptr;
444 {
445 std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx);
446
447 if (!PM->Devices[DeviceNum]) {
448 DP("%s returns, device %d not available\n", Name, DeviceNum){};
449 return nullptr;
450 }
451
452 DevicePtr = PM->Devices[DeviceNum].get();
453 }
454
455 int32_t err = 0;
456 if (DevicePtr->RTL->data_lock) {
457 err = DevicePtr->RTL->data_lock(DeviceNum, HostPtr, Size, &rc);
458 if (err) {
459 DP("Could not lock ptr %p\n", HostPtr){};
460 return nullptr;
461 }
462 }
463 DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(rc)){};
464 return rc;
465}
466
467void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) {
468 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
469 DP("Call to %s for device %d unlocking\n", Name, DeviceNum){};
470
471 DeviceTy *DevicePtr = nullptr;
472 {
473 std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx);
474
475 // Don't check deviceIsReady as it can initialize the device if needed.
476 // Just check if DeviceNum exists as targetUnlockExplicit can be called
477 // during process exit/free (and it may have been already destroyed) and
478 // targetAllocExplicit will have already checked deviceIsReady anyway.
479 size_t DevicesSize = PM->Devices.size();
480
481 if (DevicesSize <= (size_t)DeviceNum) {
482 DP("Device ID %d does not have a matching RTL\n", DeviceNum){};
483 return;
484 }
485
486 if (!PM->Devices[DeviceNum]) {
487 DP("%s returns, device %d not available\n", Name, DeviceNum){};
488 return;
489 }
490
491 DevicePtr = PM->Devices[DeviceNum].get();
492 } // unlock RTLsMtx
493
494 if (DevicePtr->RTL->data_unlock)
495 DevicePtr->RTL->data_unlock(DeviceNum, HostPtr);
496
497 DP("%s returns\n", Name){};
498}
499
500/// Call the user-defined mapper function followed by the appropriate
501// targetData* function (targetData{Begin,End,Update}).
502int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
503 int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames,
504 void *ArgMapper, AsyncInfoTy &AsyncInfo,
505 TargetDataFuncPtrTy TargetDataFunction) {
506 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
507 DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)){};
508
509 // The mapper function fills up Components.
510 MapperComponentsTy MapperComponents;
511 MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(ArgMapper);
512 (*MapperFuncPtr)((void *)&MapperComponents, ArgBase, Arg, ArgSize, ArgType,
513 ArgNames);
514
515 // Construct new arrays for args_base, args, arg_sizes and arg_types
516 // using the information in MapperComponents and call the corresponding
517 // targetData* function using these new arrays.
518 SmallVector<void *> MapperArgsBase(MapperComponents.Components.size());
519 SmallVector<void *> MapperArgs(MapperComponents.Components.size());
520 SmallVector<int64_t> MapperArgSizes(MapperComponents.Components.size());
521 SmallVector<int64_t> MapperArgTypes(MapperComponents.Components.size());
522 SmallVector<void *> MapperArgNames(MapperComponents.Components.size());
523
524 for (unsigned I = 0, E = MapperComponents.Components.size(); I < E; ++I) {
525 auto &C = MapperComponents.Components[I];
526 MapperArgsBase[I] = C.Base;
527 MapperArgs[I] = C.Begin;
528 MapperArgSizes[I] = C.Size;
529 MapperArgTypes[I] = C.Type;
530 MapperArgNames[I] = C.Name;
531 }
532
533 int Rc = TargetDataFunction(Loc, Device, MapperComponents.Components.size(),
534 MapperArgsBase.data(), MapperArgs.data(),
535 MapperArgSizes.data(), MapperArgTypes.data(),
536 MapperArgNames.data(), /*arg_mappers*/ nullptr,
537 AsyncInfo, /*FromMapper=*/true);
538
539 return Rc;
540}
541
542/// Internal function to do the mapping and transfer the data to the device
543int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
544 void **ArgsBase, void **Args, int64_t *ArgSizes,
545 int64_t *ArgTypes, map_var_info_t *ArgNames,
546 void **ArgMappers, AsyncInfoTy &AsyncInfo,
547 bool FromMapper) {
548 // process each input.
549 for (int32_t I = 0; I < ArgNum; ++I) {
550 // Ignore private variables and arrays - there is no mapping for them.
551 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
552 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
553 continue;
554
555 if (ArgMappers && ArgMappers[I]) {
556 // Instead of executing the regular path of targetDataBegin, call the
557 // targetDataMapper variant which will call targetDataBegin again
558 // with new arguments.
559 DP("Calling targetDataMapper for the %dth argument\n", I){};
560
561 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
562 int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
563 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
564 targetDataBegin);
565
566 if (Rc != OFFLOAD_SUCCESS(0)) {
567 REPORT("Call to targetDataBegin via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
568 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
569 return OFFLOAD_FAIL(~0);
570 }
571
572 // Skip the rest of this function, continue to the next argument.
573 continue;
574 }
575
576 void *HstPtrBegin = Args[I];
577 void *HstPtrBase = ArgsBase[I];
578 int64_t DataSize = ArgSizes[I];
579 map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
580
581 // Adjust for proper alignment if this is a combined entry (for structs).
582 // Look at the next argument - if that is MEMBER_OF this one, then this one
583 // is a combined entry.
584 int64_t Padding = 0;
585 const int NextI = I + 1;
586 if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
587 getParentIndex(ArgTypes[NextI]) == I) {
588 Padding = (int64_t)HstPtrBegin % Alignment;
589 if (Padding) {
590 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD{}
591 "\n",{}
592 Padding, DPxPTR(HstPtrBegin)){};
593 HstPtrBegin = (char *)HstPtrBegin - Padding;
594 DataSize += Padding;
595 }
596 }
597
598 // Address of pointer on the host and device, respectively.
599 void *PointerHstPtrBegin, *PointerTgtPtrBegin;
600 TargetPointerResultTy PointerTpr;
601 bool IsHostPtr = false;
602 bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
603 // Force the creation of a device side copy of the data when:
604 // a close map modifier was associated with a map that contained a to.
605 bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE;
606 bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
607 bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
608 // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
609 // have reached this point via __tgt_target_data_begin and not __tgt_target
610 // then no argument is marked as TARGET_PARAM ("omp target data map" is not
611 // associated with a target region, so there are no target parameters). This
612 // may be considered a hack, we could revise the scheme in the future.
613 bool UpdateRef =
614 !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0);
615 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
616 DP("Has a pointer entry: \n"){};
617 // Base is address of pointer.
618 //
619 // Usually, the pointer is already allocated by this time. For example:
620 //
621 // #pragma omp target map(s.p[0:N])
622 //
623 // The map entry for s comes first, and the PTR_AND_OBJ entry comes
624 // afterward, so the pointer is already allocated by the time the
625 // PTR_AND_OBJ entry is handled below, and PointerTgtPtrBegin is thus
626 // non-null. However, "declare target link" can produce a PTR_AND_OBJ
627 // entry for a global that might not already be allocated by the time the
628 // PTR_AND_OBJ entry is handled below, and so the allocation might fail
629 // when HasPresentModifier.
630 PointerTpr = Device.getTargetPointer(
631 HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr,
632 /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef,
633 HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo);
634 PointerTgtPtrBegin = PointerTpr.TargetPointer;
635 IsHostPtr = PointerTpr.Flags.IsHostPointer;
Value stored to 'IsHostPtr' is never read
636 if (!PointerTgtPtrBegin) {
637 REPORT("Call to getTargetPointer returned null pointer (%s).\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
638 HasPresentModifier ? "'present' map type modifier"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
639 : "device failure or illegal mapping")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
;
640 return OFFLOAD_FAIL(~0);
641 }
642 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"{}
643 "\n",{}
644 sizeof(void *), DPxPTR(PointerTgtPtrBegin),{}
645 (PointerTpr.Flags.IsNewEntry ? "" : " not")){};
646 PointerHstPtrBegin = HstPtrBase;
647 // modify current entry.
648 HstPtrBase = *(void **)HstPtrBase;
649 // No need to update pointee ref count for the first element of the
650 // subelement that comes from mapper.
651 UpdateRef =
652 (!FromMapper || I != 0); // subsequently update ref count of pointee
653 }
654
655 const bool HasFlagTo = ArgTypes[I] & OMP_TGT_MAPTYPE_TO;
656 const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
657 auto TPR = Device.getTargetPointer(
658 HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, HasFlagAlways,
659 IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier,
660 HasHoldModifier, AsyncInfo);
661 void *TgtPtrBegin = TPR.TargetPointer;
662 IsHostPtr = TPR.Flags.IsHostPointer;
663 // If data_size==0, then the argument could be a zero-length pointer to
664 // NULL, so getOrAlloc() returning NULL is not an error.
665 if (!TgtPtrBegin && (DataSize || HasPresentModifier)) {
666 REPORT("Call to getTargetPointer returned null pointer (%s).\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
667 HasPresentModifier ? "'present' map type modifier"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
668 : "device failure or illegal mapping")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
;
669 return OFFLOAD_FAIL(~0);
670 }
671 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD{}
672 " - is%s new\n",{}
673 DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")){};
674
675 if (ArgTypes[I] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
676 uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
677 void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
678 DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase)){};
679 ArgsBase[I] = TgtPtrBase;
680 }
681
682 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
683 // Check whether we need to update the pointer on the device
684 bool UpdateDevPtr = false;
685
686 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
687 void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
688
689 Device.ShadowMtx.lock();
690 auto Entry = Device.ShadowPtrMap.find(PointerHstPtrBegin);
691 // If this pointer is not in the map we need to insert it. If the map
692 // contains a stale entry, we need to update it (e.g. if the pointee was
693 // deallocated and later on is reallocated at another device address). The
694 // latter scenario is the subject of LIT test env/base_ptr_ref_count.c. An
695 // entry is removed from ShadowPtrMap only when the PTR of a PTR_AND_OBJ
696 // pair is deallocated, not when the OBJ is deallocated. In
697 // env/base_ptr_ref_count.c the PTR is a global "declare target" pointer,
698 // so it stays in the map for the lifetime of the application. When the
699 // OBJ is deallocated and later on allocated again (at a different device
700 // address), ShadowPtrMap still contains an entry for Pointer_HstPtrBegin
701 // which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ.
702 if (Entry == Device.ShadowPtrMap.end() ||
703 Entry->second.TgtPtrVal != ExpectedTgtPtrBase) {
704 // create or update shadow pointers for this entry
705 Device.ShadowPtrMap[PointerHstPtrBegin] = {
706 HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
707 PointerTpr.Entry->setMayContainAttachedPointers();
708 UpdateDevPtr = true;
709 }
710
711 if (UpdateDevPtr) {
712 std::lock_guard<decltype(*PointerTpr.Entry)> LG(*PointerTpr.Entry);
713 Device.ShadowMtx.unlock();
714
715 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",{}
716 DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)){};
717
718 void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation();
719 TgtPtrBase = ExpectedTgtPtrBase;
720
721 int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
722 sizeof(void *), AsyncInfo);
723 if (Ret != OFFLOAD_SUCCESS(0)) {
724 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
725 return OFFLOAD_FAIL(~0);
726 }
727 if (PointerTpr.Entry->addEventIfNecessary(Device, AsyncInfo) !=
728 OFFLOAD_SUCCESS(0))
729 return OFFLOAD_FAIL(~0);
730 } else
731 Device.ShadowMtx.unlock();
732 }
733 }
734
735 return OFFLOAD_SUCCESS(0);
736}
737
738namespace {
739/// This structure contains information to deallocate a target pointer, aka.
740/// used to fix up the shadow map and potentially delete the entry from the
741/// mapping table via \p DeviceTy::deallocTgtPtr.
742struct PostProcessingInfo {
743 /// Host pointer used to look up into the map table
744 void *HstPtrBegin;
745
746 /// Size of the data
747 int64_t DataSize;
748
749 /// The mapping type (bitfield).
750 int64_t ArgType;
751
752 /// Index of the argument in the data mapping scheme.
753 int32_t ArgIndex;
754
755 /// The target pointer information.
756 TargetPointerResultTy TPR;
757
758 PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType,
759 int32_t ArgIndex, TargetPointerResultTy TPR)
760 : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType),
761 ArgIndex(ArgIndex), TPR(TPR) {}
762};
763
764/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to
765/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the
766/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the
767/// rest of the map is not checked anymore.
768template <typename CBTy>
769static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin,
770 uintptr_t Size,
771 const TargetPointerResultTy &TPR) {
772 // If we have an object that is too small to hold a pointer subobject, no need
773 // to do any checking.
774 if (Size < sizeof(void *))
775 return;
776
777 // If the map entry for the object was never marked as containing attached
778 // pointers, no need to do any checking.
779 if (!TPR.Entry || !TPR.Entry->getMayContainAttachedPointers())
780 return;
781
782 uintptr_t LB = (uintptr_t)Begin;
783 uintptr_t UB = LB + Size;
784 // Now we are looking into the shadow map so we need to lock it.
785 std::lock_guard<decltype(Device.ShadowMtx)> LG(Device.ShadowMtx);
786 for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin();
787 Itr != Device.ShadowPtrMap.end();) {
788 uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first;
789
790 // An STL map is sorted on its keys; use this property
791 // to quickly determine when to break out of the loop.
792 if (ShadowHstPtrAddr < LB) {
793 ++Itr;
794 continue;
795 }
796 if (ShadowHstPtrAddr >= UB)
797 break;
798
799 if (CB(Itr) == OFFLOAD_FAIL(~0))
800 break;
801 }
802}
803
804} // namespace
805
806/// Applies the necessary post-processing procedures to entries listed in \p
807/// EntriesInfo after the execution of all device side operations from a target
808/// data end. This includes the update of pointers at the host and removal of
809/// device buffer when needed. It returns OFFLOAD_FAIL or OFFLOAD_SUCCESS
810/// according to the successfulness of the operations.
811[[nodiscard]] static int
812postProcessingTargetDataEnd(DeviceTy *Device,
813 SmallVector<PostProcessingInfo> EntriesInfo,
814 bool FromMapper) {
815 int Ret = OFFLOAD_SUCCESS(0);
816 void *FromMapperBase = nullptr;
817
818 for (auto &[HstPtrBegin, DataSize, ArgType, ArgIndex, TPR] : EntriesInfo) {
819 bool DelEntry = !TPR.isHostPointer();
820
821 // If the last element from the mapper (for end transfer args comes in
822 // reverse order), do not remove the partial entry, the parent struct still
823 // exists.
824 if ((ArgType & OMP_TGT_MAPTYPE_MEMBER_OF) &&
825 !(ArgType & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
826 DelEntry = false; // protect parent struct from being deallocated
827 }
828
829 if (DelEntry && FromMapper && ArgIndex == 0) {
830 DelEntry = false;
831 FromMapperBase = HstPtrBegin;
832 }
833
834 // If we marked the entry to be deleted we need to verify no other
835 // thread reused it by now. If deletion is still supposed to happen by
836 // this thread LR will be set and exclusive access to the HDTT map
837 // will avoid another thread reusing the entry now. Note that we do
838 // not request (exclusive) access to the HDTT map if DelEntry is
839 // not set.
840 DeviceTy::HDTTMapAccessorTy HDTTMap =
841 Device->HostDataToTargetMap.getExclusiveAccessor(!DelEntry);
842
843 const bool IsNotLastUser = TPR.Entry->decDataEndThreadCount() != 0;
844 if (DelEntry && (TPR.Entry->getTotalRefCount() != 0 || IsNotLastUser)) {
845 // The thread is not in charge of deletion anymore. Give up access
846 // to the HDTT map and unset the deletion flag.
847 HDTTMap.destroy();
848 DelEntry = false;
849 }
850
851 // If we copied back to the host a struct/array containing pointers,
852 // we need to restore the original host pointer values from their
853 // shadow copies. If the struct is going to be deallocated, remove any
854 // remaining shadow pointer entries for this struct.
855 const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
856 auto CB = [&](ShadowPtrListTy::iterator &Itr) {
857 // If we copied the struct to the host, we need to restore the
858 // pointer.
859 if (HasFrom) {
860 void **ShadowHstPtrAddr = (void **)Itr->first;
861 *ShadowHstPtrAddr = Itr->second.HstPtrVal;
862 DP("Restoring original host pointer value " DPxMOD " for host "{}
863 "pointer " DPxMOD "\n",{}
864 DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)){};
865 }
866 // If the struct is to be deallocated, remove the shadow entry.
867 if (DelEntry) {
868 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)){};
869 auto OldItr = Itr;
870 Itr++;
871 Device->ShadowPtrMap.erase(OldItr);
872 } else {
873 ++Itr;
874 }
875 return OFFLOAD_SUCCESS(0);
876 };
877 applyToShadowMapEntries(*Device, CB, HstPtrBegin, DataSize, TPR);
878
879 if (!DelEntry || (FromMapperBase && FromMapperBase == HstPtrBegin))
880 continue;
881
882 // If we are deleting the entry the DataMapMtx is locked and we own
883 // the entry.
884 Ret = Device->eraseMapEntry(HDTTMap, TPR.Entry, DataSize);
885 // Entry is already remove from the map, we can unlock it now.
886 HDTTMap.destroy();
887 Ret |= Device->deallocTgtPtrAndEntry(TPR.Entry, DataSize);
888 if (Ret != OFFLOAD_SUCCESS(0)) {
889 REPORT("Deallocating data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Deallocating data from device failed.\n"); } while (0);
;
890 break;
891 }
892 }
893
894 return Ret;
895}
896
897/// Internal function to undo the mapping and retrieve the data from the device.
898int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
899 void **ArgBases, void **Args, int64_t *ArgSizes,
900 int64_t *ArgTypes, map_var_info_t *ArgNames,
901 void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
902 int Ret = OFFLOAD_SUCCESS(0);
903 SmallVector<PostProcessingInfo> PostProcessingPtrs;
904 void *FromMapperBase = nullptr;
905 // process each input.
906 for (int32_t I = ArgNum - 1; I >= 0; --I) {
907 // Ignore private variables and arrays - there is no mapping for them.
908 // Also, ignore the use_device_ptr directive, it has no effect here.
909 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
910 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
911 continue;
912
913 if (ArgMappers && ArgMappers[I]) {
914 // Instead of executing the regular path of targetDataEnd, call the
915 // targetDataMapper variant which will call targetDataEnd again
916 // with new arguments.
917 DP("Calling targetDataMapper for the %dth argument\n", I){};
918
919 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
920 Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I],
921 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
922 targetDataEnd);
923
924 if (Ret != OFFLOAD_SUCCESS(0)) {
925 REPORT("Call to targetDataEnd via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
926 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
927 return OFFLOAD_FAIL(~0);
928 }
929
930 // Skip the rest of this function, continue to the next argument.
931 continue;
932 }
933
934 void *HstPtrBegin = Args[I];
935 int64_t DataSize = ArgSizes[I];
936 // Adjust for proper alignment if this is a combined entry (for structs).
937 // Look at the next argument - if that is MEMBER_OF this one, then this one
938 // is a combined entry.
939 const int NextI = I + 1;
940 if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
941 getParentIndex(ArgTypes[NextI]) == I) {
942 int64_t Padding = (int64_t)HstPtrBegin % Alignment;
943 if (Padding) {
944 DP("Using a Padding of %" PRId64 " bytes for begin address " DPxMOD{}
945 "\n",{}
946 Padding, DPxPTR(HstPtrBegin)){};
947 HstPtrBegin = (char *)HstPtrBegin - Padding;
948 DataSize += Padding;
949 }
950 }
951
952 bool IsLast, IsHostPtr;
953 bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
954 bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
955 (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
956 !(FromMapper && I == 0);
957 bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;
958 bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
959 bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
960
961 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
962 TargetPointerResultTy TPR = Device.getTgtPtrBegin(
963 HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr,
964 !IsImplicit, ForceDelete, /*FromDataEnd=*/true);
965 void *TgtPtrBegin = TPR.TargetPointer;
966 if (!TPR.isPresent() && !TPR.isHostPointer() &&
967 (DataSize || HasPresentModifier)) {
968 DP("Mapping does not exist (%s)\n",{}
969 (HasPresentModifier ? "'present' map type modifier" : "ignored")){};
970 if (HasPresentModifier) {
971 // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13:
972 // "If a map clause appears on a target, target data, target enter data
973 // or target exit data construct with a present map-type-modifier then
974 // on entry to the region if the corresponding list item does not appear
975 // in the device data environment then an error occurs and the program
976 // terminates."
977 //
978 // This should be an error upon entering an "omp target exit data". It
979 // should not be an error upon exiting an "omp target data" or "omp
980 // target". For "omp target data", Clang thus doesn't include present
981 // modifiers for end calls. For "omp target", we have not found a valid
982 // OpenMP program for which the error matters: it appears that, if a
983 // program can guarantee that data is present at the beginning of an
984 // "omp target" region so that there's no error there, that data is also
985 // guaranteed to be present at the end.
986 MESSAGE("device mapping required by 'present' map type modifier does "do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
987 "not exist for host address " DPxMOD " (%" PRId64 " bytes)",do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
988 DPxPTR(HstPtrBegin), DataSize)do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
;
989 return OFFLOAD_FAIL(~0);
990 }
991 } else {
992 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD{}
993 " - is%s last\n",{}
994 DataSize, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not")){};
995 }
996
997 // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16:
998 // "If the map clause appears on a target, target data, or target exit data
999 // construct and a corresponding list item of the original list item is not
1000 // present in the device data environment on exit from the region then the
1001 // list item is ignored."
1002 if (!TPR.isPresent())
1003 continue;
1004
1005 // Move data back to the host
1006 const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
1007 const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM;
1008 if (HasFrom && (HasAlways || IsLast) && !IsHostPtr) {
1009 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",{}
1010 DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)){};
1011
1012 std::lock_guard<decltype(*TPR.Entry)> LG(*TPR.Entry);
1013 // Wait for any previous transfer if an event is present.
1014 if (void *Event = TPR.Entry->getEvent()) {
1015 if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS(0)) {
1016 REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to wait for event " "0x%0*" "l" "x" ".\n", ((int)(2
* sizeof(uintptr_t))), ((uintptr_t)(Event))); } while (0);
;
1017 return OFFLOAD_FAIL(~0);
1018 }
1019 }
1020
1021 Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo);
1022 if (Ret != OFFLOAD_SUCCESS(0)) {
1023 REPORT("Copying data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data from device failed.\n"); } while (0);
;
1024 return OFFLOAD_FAIL(~0);
1025 }
1026
1027 // As we are expecting to delete the entry the d2h copy might race
1028 // with another one that also tries to delete the entry. This happens
1029 // as the entry can be reused and the reuse might happen after the
1030 // copy-back was issued but before it completed. Since the reuse might
1031 // also copy-back a value we would race.
1032 if (IsLast) {
1033 if (TPR.Entry->addEventIfNecessary(Device, AsyncInfo) !=
1034 OFFLOAD_SUCCESS(0))
1035 return OFFLOAD_FAIL(~0);
1036 }
1037 }
1038
1039 // Add pointer to the buffer for post-synchronize processing.
1040 PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I], I, TPR);
1041 }
1042
1043 // Add post-processing functions
1044 // TODO: We might want to remove `mutable` in the future by not changing the
1045 // captured variables somehow.
1046 AsyncInfo.addPostProcessingFunction(
1047 [=, Device = &Device,
1048 PostProcessingPtrs = std::move(PostProcessingPtrs)]() mutable -> int {
1049 return postProcessingTargetDataEnd(Device, PostProcessingPtrs,
1050 FromMapperBase);
1051 });
1052
1053 return Ret;
1054}
1055
1056static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
1057 void *HstPtrBegin, int64_t ArgSize,
1058 int64_t ArgType, AsyncInfoTy &AsyncInfo) {
1059 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
1060 bool IsLast, IsHostPtr;
1061 TargetPointerResultTy TPR = Device.getTgtPtrBegin(
1062 HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false,
1063 /*UseHoldRefCount=*/false, IsHostPtr, /*MustContain=*/true);
1064 void *TgtPtrBegin = TPR.TargetPointer;
1065 if (!TPR.isPresent()) {
1066 DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)){};
1067 if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
1068 MESSAGE("device mapping required by 'present' motion modifier does not "do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
1069 "exist for host address " DPxMOD " (%" PRId64 " bytes)",do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
1070 DPxPTR(HstPtrBegin), ArgSize)do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
;
1071 return OFFLOAD_FAIL(~0);
1072 }
1073 return OFFLOAD_SUCCESS(0);
1074 }
1075
1076 if (IsHostPtr) {
1077 DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",{}
1078 DPxPTR(HstPtrBegin)){};
1079 return OFFLOAD_SUCCESS(0);
1080 }
1081
1082 if (ArgType & OMP_TGT_MAPTYPE_FROM) {
1083 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",{}
1084 ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)){};
1085 int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo);
1086 if (Ret != OFFLOAD_SUCCESS(0)) {
1087 REPORT("Copying data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data from device failed.\n"); } while (0);
;
1088 return OFFLOAD_FAIL(~0);
1089 }
1090
1091 // Wait for device-to-host memcopies for whole struct to complete,
1092 // before restoring the correct host pointer.
1093 AsyncInfo.addPostProcessingFunction([=, Device = &Device]() -> int {
1094 auto CB = [&](ShadowPtrListTy::iterator &Itr) {
1095 void **ShadowHstPtrAddr = (void **)Itr->first;
1096 *ShadowHstPtrAddr = Itr->second.HstPtrVal;
1097 DP("Restoring original host pointer value " DPxMOD{}
1098 " for host pointer " DPxMOD "\n",{}
1099 DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)){};
1100 ++Itr;
1101 return OFFLOAD_SUCCESS(0);
1102 };
1103 applyToShadowMapEntries(*Device, CB, HstPtrBegin, ArgSize, TPR);
1104
1105 return OFFLOAD_SUCCESS(0);
1106 });
1107 }
1108
1109 if (ArgType & OMP_TGT_MAPTYPE_TO) {
1110 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",{}
1111 ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)){};
1112 int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo);
1113 if (Ret != OFFLOAD_SUCCESS(0)) {
1114 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1115 return OFFLOAD_FAIL(~0);
1116 }
1117
1118 auto CB = [&](ShadowPtrListTy::iterator &Itr) {
1119 DP("Restoring original target pointer value " DPxMOD " for target "{}
1120 "pointer " DPxMOD "\n",{}
1121 DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr)){};
1122 Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal,
1123 sizeof(void *), AsyncInfo);
1124 if (Ret != OFFLOAD_SUCCESS(0))
1125 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1126 ++Itr;
1127 return Ret;
1128 };
1129 applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
1130 }
1131 return OFFLOAD_SUCCESS(0);
1132}
1133
1134static int targetDataNonContiguous(ident_t *Loc, DeviceTy &Device,
1135 void *ArgsBase,
1136 __tgt_target_non_contig *NonContig,
1137 uint64_t Size, int64_t ArgType,
1138 int CurrentDim, int DimSize, uint64_t Offset,
1139 AsyncInfoTy &AsyncInfo) {
1140 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
1141 int Ret = OFFLOAD_SUCCESS(0);
1142 if (CurrentDim < DimSize) {
1143 for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
1144 uint64_t CurOffset =
1145 (NonContig[CurrentDim].Offset + I) * NonContig[CurrentDim].Stride;
1146 // we only need to transfer the first element for the last dimension
1147 // since we've already got a contiguous piece.
1148 if (CurrentDim != DimSize - 1 || I == 0) {
1149 Ret = targetDataNonContiguous(Loc, Device, ArgsBase, NonContig, Size,
1150 ArgType, CurrentDim + 1, DimSize,
1151 Offset + CurOffset, AsyncInfo);
1152 // Stop the whole process if any contiguous piece returns anything
1153 // other than OFFLOAD_SUCCESS.
1154 if (Ret != OFFLOAD_SUCCESS(0))
1155 return Ret;
1156 }
1157 }
1158 } else {
1159 char *Ptr = (char *)ArgsBase + Offset;
1160 DP("Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64{}
1161 " len %" PRIu64 "\n",{}
1162 DPxPTR(Ptr), Offset, Size){};
1163 Ret = targetDataContiguous(Loc, Device, ArgsBase, Ptr, Size, ArgType,
1164 AsyncInfo);
1165 }
1166 return Ret;
1167}
1168
1169static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
1170 int32_t DimSize) {
1171 int RemovedDim = 0;
1172 for (int I = DimSize - 1; I > 0; --I) {
1173 if (NonContig[I].Count * NonContig[I].Stride == NonContig[I - 1].Stride)
1174 RemovedDim++;
1175 }
1176 return RemovedDim;
1177}
1178
1179/// Internal function to pass data to/from the target.
1180int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
1181 void **ArgsBase, void **Args, int64_t *ArgSizes,
1182 int64_t *ArgTypes, map_var_info_t *ArgNames,
1183 void **ArgMappers, AsyncInfoTy &AsyncInfo, bool) {
1184 // process each input.
1185 for (int32_t I = 0; I < ArgNum; ++I) {
1186 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
1187 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
1188 continue;
1189
1190 if (ArgMappers && ArgMappers[I]) {
1191 // Instead of executing the regular path of targetDataUpdate, call the
1192 // targetDataMapper variant which will call targetDataUpdate again
1193 // with new arguments.
1194 DP("Calling targetDataMapper for the %dth argument\n", I){};
1195
1196 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
1197 int Ret = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
1198 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
1199 targetDataUpdate);
1200
1201 if (Ret != OFFLOAD_SUCCESS(0)) {
1202 REPORT("Call to targetDataUpdate via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataUpdate via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
1203 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataUpdate via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
1204 return OFFLOAD_FAIL(~0);
1205 }
1206
1207 // Skip the rest of this function, continue to the next argument.
1208 continue;
1209 }
1210
1211 int Ret = OFFLOAD_SUCCESS(0);
1212
1213 if (ArgTypes[I] & OMP_TGT_MAPTYPE_NON_CONTIG) {
1214 __tgt_target_non_contig *NonContig = (__tgt_target_non_contig *)Args[I];
1215 int32_t DimSize = ArgSizes[I];
1216 uint64_t Size =
1217 NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
1218 int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
1219 Ret = targetDataNonContiguous(
1220 Loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
1221 /*current_dim=*/0, DimSize - MergedDim, /*offset=*/0, AsyncInfo);
1222 } else {
1223 Ret = targetDataContiguous(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
1224 ArgTypes[I], AsyncInfo);
1225 }
1226 if (Ret == OFFLOAD_FAIL(~0))
1227 return OFFLOAD_FAIL(~0);
1228 }
1229 return OFFLOAD_SUCCESS(0);
1230}
1231
1232static const unsigned LambdaMapping = OMP_TGT_MAPTYPE_PTR_AND_OBJ |
1233 OMP_TGT_MAPTYPE_LITERAL |
1234 OMP_TGT_MAPTYPE_IMPLICIT;
1235static bool isLambdaMapping(int64_t Mapping) {
1236 return (Mapping & LambdaMapping) == LambdaMapping;
1237}
1238
1239namespace {
1240/// Find the table information in the map or look it up in the translation
1241/// tables.
1242TableMap *getTableMap(void *HostPtr) {
1243 std::lock_guard<std::mutex> TblMapLock(PM->TblMapMtx);
1244 HostPtrToTableMapTy::iterator TableMapIt =
1245 PM->HostPtrToTableMap.find(HostPtr);
1246
1247 if (TableMapIt != PM->HostPtrToTableMap.end())
1248 return &TableMapIt->second;
1249
1250 // We don't have a map. So search all the registered libraries.
1251 TableMap *TM = nullptr;
1252 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1253 for (HostEntriesBeginToTransTableTy::iterator Itr =
1254 PM->HostEntriesBeginToTransTable.begin();
1255 Itr != PM->HostEntriesBeginToTransTable.end(); ++Itr) {
1256 // get the translation table (which contains all the good info).
1257 TranslationTable *TransTable = &Itr->second;
1258 // iterate over all the host table entries to see if we can locate the
1259 // host_ptr.
1260 __tgt_offload_entry *Cur = TransTable->HostTable.EntriesBegin;
1261 for (uint32_t I = 0; Cur < TransTable->HostTable.EntriesEnd; ++Cur, ++I) {
1262 if (Cur->addr != HostPtr)
1263 continue;
1264 // we got a match, now fill the HostPtrToTableMap so that we
1265 // may avoid this search next time.
1266 TM = &(PM->HostPtrToTableMap)[HostPtr];
1267 TM->Table = TransTable;
1268 TM->Index = I;
1269 return TM;
1270 }
1271 }
1272
1273 return nullptr;
1274}
1275
1276/// A class manages private arguments in a target region.
1277class PrivateArgumentManagerTy {
1278 /// A data structure for the information of first-private arguments. We can
1279 /// use this information to optimize data transfer by packing all
1280 /// first-private arguments and transfer them all at once.
1281 struct FirstPrivateArgInfoTy {
1282 /// The index of the element in \p TgtArgs corresponding to the argument
1283 int Index;
1284 /// Host pointer begin
1285 char *HstPtrBegin;
1286 /// Host pointer end
1287 char *HstPtrEnd;
1288 /// Aligned size
1289 int64_t AlignedSize;
1290 /// Host pointer name
1291 map_var_info_t HstPtrName = nullptr;
1292
1293 FirstPrivateArgInfoTy(int Index, void *HstPtr, int64_t Size,
1294 const map_var_info_t HstPtrName = nullptr)
1295 : Index(Index), HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
1296 HstPtrEnd(HstPtrBegin + Size), AlignedSize(Size + Size % Alignment),
1297 HstPtrName(HstPtrName) {}
1298 };
1299
1300 /// A vector of target pointers for all private arguments
1301 SmallVector<void *> TgtPtrs;
1302
1303 /// A vector of information of all first-private arguments to be packed
1304 SmallVector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
1305 /// Host buffer for all arguments to be packed
1306 SmallVector<char> FirstPrivateArgBuffer;
1307 /// The total size of all arguments to be packed
1308 int64_t FirstPrivateArgSize = 0;
1309
1310 /// A reference to the \p DeviceTy object
1311 DeviceTy &Device;
1312 /// A pointer to a \p AsyncInfoTy object
1313 AsyncInfoTy &AsyncInfo;
1314
1315 // TODO: What would be the best value here? Should we make it configurable?
1316 // If the size is larger than this threshold, we will allocate and transfer it
1317 // immediately instead of packing it.
1318 static constexpr const int64_t FirstPrivateArgSizeThreshold = 1024;
1319
1320public:
1321 /// Constructor
1322 PrivateArgumentManagerTy(DeviceTy &Dev, AsyncInfoTy &AsyncInfo)
1323 : Device(Dev), AsyncInfo(AsyncInfo) {}
1324
1325 /// Add a private argument
1326 int addArg(void *HstPtr, int64_t ArgSize, int64_t ArgOffset,
1327 bool IsFirstPrivate, void *&TgtPtr, int TgtArgsIndex,
1328 const map_var_info_t HstPtrName = nullptr,
1329 const bool AllocImmediately = false) {
1330 // If the argument is not first-private, or its size is greater than a
1331 // predefined threshold, we will allocate memory and issue the transfer
1332 // immediately.
1333 if (ArgSize > FirstPrivateArgSizeThreshold || !IsFirstPrivate ||
1334 AllocImmediately) {
1335 TgtPtr = Device.allocData(ArgSize, HstPtr);
1336 if (!TgtPtr) {
1337 DP("Data allocation for %sprivate array " DPxMOD " failed.\n",{}
1338 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtr)){};
1339 return OFFLOAD_FAIL(~0);
1340 }
1341#ifdef OMPTARGET_DEBUG
1342 void *TgtPtrBase = (void *)((intptr_t)TgtPtr + ArgOffset);
1343 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD{}
1344 " for %sprivate array " DPxMOD " - pushing target argument " DPxMOD{}
1345 "\n",{}
1346 ArgSize, DPxPTR(TgtPtr), (IsFirstPrivate ? "first-" : ""),{}
1347 DPxPTR(HstPtr), DPxPTR(TgtPtrBase)){};
1348#endif
1349 // If first-private, copy data from host
1350 if (IsFirstPrivate) {
1351 DP("Submitting firstprivate data to the device.\n"){};
1352 int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, AsyncInfo);
1353 if (Ret != OFFLOAD_SUCCESS(0)) {
1354 DP("Copying data to device failed, failed.\n"){};
1355 return OFFLOAD_FAIL(~0);
1356 }
1357 }
1358 TgtPtrs.push_back(TgtPtr);
1359 } else {
1360 DP("Firstprivate array " DPxMOD " of size %" PRId64 " will be packed\n",{}
1361 DPxPTR(HstPtr), ArgSize){};
1362 // When reach this point, the argument must meet all following
1363 // requirements:
1364 // 1. Its size does not exceed the threshold (see the comment for
1365 // FirstPrivateArgSizeThreshold);
1366 // 2. It must be first-private (needs to be mapped to target device).
1367 // We will pack all this kind of arguments to transfer them all at once
1368 // to reduce the number of data transfer. We will not take
1369 // non-first-private arguments, aka. private arguments that doesn't need
1370 // to be mapped to target device, into account because data allocation
1371 // can be very efficient with memory manager.
1372
1373 // Placeholder value
1374 TgtPtr = nullptr;
1375 FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
1376 HstPtrName);
1377 FirstPrivateArgSize += FirstPrivateArgInfo.back().AlignedSize;
1378 }
1379
1380 return OFFLOAD_SUCCESS(0);
1381 }
1382
1383 /// Pack first-private arguments, replace place holder pointers in \p TgtArgs,
1384 /// and start the transfer.
1385 int packAndTransfer(SmallVector<void *> &TgtArgs) {
1386 if (!FirstPrivateArgInfo.empty()) {
1387 assert(FirstPrivateArgSize != 0 &&(static_cast <bool> (FirstPrivateArgSize != 0 &&
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")
? void (0) : __assert_fail ("FirstPrivateArgSize != 0 && \"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty\""
, "openmp/libomptarget/src/omptarget.cpp", 1388, __extension__
__PRETTY_FUNCTION__))
1388 "FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")(static_cast <bool> (FirstPrivateArgSize != 0 &&
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")
? void (0) : __assert_fail ("FirstPrivateArgSize != 0 && \"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty\""
, "openmp/libomptarget/src/omptarget.cpp", 1388, __extension__
__PRETTY_FUNCTION__))
;
1389 FirstPrivateArgBuffer.resize(FirstPrivateArgSize, 0);
1390 auto Itr = FirstPrivateArgBuffer.begin();
1391 // Copy all host data to this buffer
1392 for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
1393 std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
1394 Itr = std::next(Itr, Info.AlignedSize);
1395 }
1396 // Allocate target memory
1397 void *TgtPtr =
1398 Device.allocData(FirstPrivateArgSize, FirstPrivateArgBuffer.data());
1399 if (TgtPtr == nullptr) {
1400 DP("Failed to allocate target memory for private arguments.\n"){};
1401 return OFFLOAD_FAIL(~0);
1402 }
1403 TgtPtrs.push_back(TgtPtr);
1404 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD "\n",{}
1405 FirstPrivateArgSize, DPxPTR(TgtPtr)){};
1406 // Transfer data to target device
1407 int Ret = Device.submitData(TgtPtr, FirstPrivateArgBuffer.data(),
1408 FirstPrivateArgSize, AsyncInfo);
1409 if (Ret != OFFLOAD_SUCCESS(0)) {
1410 DP("Failed to submit data of private arguments.\n"){};
1411 return OFFLOAD_FAIL(~0);
1412 }
1413 // Fill in all placeholder pointers
1414 auto TP = reinterpret_cast<uintptr_t>(TgtPtr);
1415 for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
1416 void *&Ptr = TgtArgs[Info.Index];
1417 assert(Ptr == nullptr && "Target pointer is already set by mistaken")(static_cast <bool> (Ptr == nullptr && "Target pointer is already set by mistaken"
) ? void (0) : __assert_fail ("Ptr == nullptr && \"Target pointer is already set by mistaken\""
, "openmp/libomptarget/src/omptarget.cpp", 1417, __extension__
__PRETTY_FUNCTION__))
;
1418 Ptr = reinterpret_cast<void *>(TP);
1419 TP += Info.AlignedSize;
1420 DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD{}
1421 "\n",{}
1422 DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin,{}
1423 DPxPTR(Ptr)){};
1424 }
1425 }
1426
1427 return OFFLOAD_SUCCESS(0);
1428 }
1429
1430 /// Free all target memory allocated for private arguments
1431 int free() {
1432 for (void *P : TgtPtrs) {
1433 int Ret = Device.deleteData(P);
1434 if (Ret != OFFLOAD_SUCCESS(0)) {
1435 DP("Deallocation of (first-)private arrays failed.\n"){};
1436 return OFFLOAD_FAIL(~0);
1437 }
1438 }
1439
1440 TgtPtrs.clear();
1441
1442 return OFFLOAD_SUCCESS(0);
1443 }
1444};
1445
1446/// Process data before launching the kernel, including calling targetDataBegin
1447/// to map and transfer data to target device, transferring (first-)private
1448/// variables.
1449static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
1450 int32_t ArgNum, void **ArgBases, void **Args,
1451 int64_t *ArgSizes, int64_t *ArgTypes,
1452 map_var_info_t *ArgNames, void **ArgMappers,
1453 SmallVector<void *> &TgtArgs,
1454 SmallVector<ptrdiff_t> &TgtOffsets,
1455 PrivateArgumentManagerTy &PrivateArgumentManager,
1456 AsyncInfoTy &AsyncInfo) {
1457 TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("mappingBeforeTargetRegion"
, SI.getProfileLocation())
;
1458 DeviceTy &Device = *PM->Devices[DeviceId];
1459 int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
1460 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
1461 if (Ret != OFFLOAD_SUCCESS(0)) {
1462 REPORT("Call to targetDataBegin failed, abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin failed, abort target.\n"); } while
(0);
;
1463 return OFFLOAD_FAIL(~0);
1464 }
1465
1466 // List of (first-)private arrays allocated for this target region
1467 SmallVector<int> TgtArgsPositions(ArgNum, -1);
1468
1469 for (int32_t I = 0; I < ArgNum; ++I) {
1470 if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
1471 // This is not a target parameter, do not push it into TgtArgs.
1472 // Check for lambda mapping.
1473 if (isLambdaMapping(ArgTypes[I])) {
1474 assert((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) &&(static_cast <bool> ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF
) && "PTR_AND_OBJ must be also MEMBER_OF.") ? void (0
) : __assert_fail ("(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && \"PTR_AND_OBJ must be also MEMBER_OF.\""
, "openmp/libomptarget/src/omptarget.cpp", 1475, __extension__
__PRETTY_FUNCTION__))
1475 "PTR_AND_OBJ must be also MEMBER_OF.")(static_cast <bool> ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF
) && "PTR_AND_OBJ must be also MEMBER_OF.") ? void (0
) : __assert_fail ("(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && \"PTR_AND_OBJ must be also MEMBER_OF.\""
, "openmp/libomptarget/src/omptarget.cpp", 1475, __extension__
__PRETTY_FUNCTION__))
;
1476 unsigned Idx = getParentIndex(ArgTypes[I]);
1477 int TgtIdx = TgtArgsPositions[Idx];
1478 assert(TgtIdx != -1 && "Base address must be translated already.")(static_cast <bool> (TgtIdx != -1 && "Base address must be translated already."
) ? void (0) : __assert_fail ("TgtIdx != -1 && \"Base address must be translated already.\""
, "openmp/libomptarget/src/omptarget.cpp", 1478, __extension__
__PRETTY_FUNCTION__))
;
1479 // The parent lambda must be processed already and it must be the last
1480 // in TgtArgs and TgtOffsets arrays.
1481 void *HstPtrVal = Args[I];
1482 void *HstPtrBegin = ArgBases[I];
1483 void *HstPtrBase = Args[Idx];
1484 bool IsLast, IsHostPtr; // IsLast is unused.
1485 void *TgtPtrBase =
1486 (void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]);
1487 DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)){};
1488 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
1489 void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
1490 void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();
1491 TargetPointerResultTy TPR = Device.getTgtPtrBegin(
1492 HstPtrVal, ArgSizes[I], IsLast, /*UpdateRefCount=*/false,
1493 /*UseHoldRefCount=*/false, IsHostPtr);
1494 PointerTgtPtrBegin = TPR.TargetPointer;
1495 if (!TPR.isPresent()) {
1496 DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",{}
1497 DPxPTR(HstPtrVal)){};
1498 continue;
1499 }
1500 if (IsHostPtr) {
1501 DP("Unified memory is active, no need to map lambda captured"{}
1502 "variable (" DPxMOD ")\n",{}
1503 DPxPTR(HstPtrVal)){};
1504 continue;
1505 }
1506 DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",{}
1507 DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)){};
1508 Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin,
1509 sizeof(void *), AsyncInfo);
1510 if (Ret != OFFLOAD_SUCCESS(0)) {
1511 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1512 return OFFLOAD_FAIL(~0);
1513 }
1514 }
1515 continue;
1516 }
1517 void *HstPtrBegin = Args[I];
1518 void *HstPtrBase = ArgBases[I];
1519 void *TgtPtrBegin;
1520 map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
1521 ptrdiff_t TgtBaseOffset;
1522 bool IsLast, IsHostPtr; // unused.
1523 TargetPointerResultTy TPR;
1524 if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
1525 DP("Forwarding first-private value " DPxMOD " to the target construct\n",{}
1526 DPxPTR(HstPtrBase)){};
1527 TgtPtrBegin = HstPtrBase;
1528 TgtBaseOffset = 0;
1529 } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
1530 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
1531 const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);
1532 // If there is a next argument and it depends on the current one, we need
1533 // to allocate the private memory immediately. If this is not the case,
1534 // then the argument can be marked for optimization and packed with the
1535 // other privates.
1536 const bool AllocImmediately =
1537 (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
1538 Ret = PrivateArgumentManager.addArg(
1539 HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
1540 TgtArgs.size(), HstPtrName, AllocImmediately);
1541 if (Ret != OFFLOAD_SUCCESS(0)) {
1542 REPORT("Failed to process %sprivate argument " DPxMOD "\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process %sprivate argument " "0x%0*" "l" "x" "\n"
, (IsFirstPrivate ? "first-" : ""), ((int)(2 * sizeof(uintptr_t
))), ((uintptr_t)(HstPtrBegin))); } while (0);
1543 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process %sprivate argument " "0x%0*" "l" "x" "\n"
, (IsFirstPrivate ? "first-" : ""), ((int)(2 * sizeof(uintptr_t
))), ((uintptr_t)(HstPtrBegin))); } while (0);
;
1544 return OFFLOAD_FAIL(~0);
1545 }
1546 } else {
1547 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
1548 HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
1549 TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
1550 /*UpdateRefCount=*/false,
1551 /*UseHoldRefCount=*/false, IsHostPtr);
1552 TgtPtrBegin = TPR.TargetPointer;
1553 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
1554#ifdef OMPTARGET_DEBUG
1555 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
1556 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",{}
1557 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin)){};
1558#endif
1559 }
1560 TgtArgsPositions[I] = TgtArgs.size();
1561 TgtArgs.push_back(TgtPtrBegin);
1562 TgtOffsets.push_back(TgtBaseOffset);
1563 }
1564
1565 assert(TgtArgs.size() == TgtOffsets.size() &&(static_cast <bool> (TgtArgs.size() == TgtOffsets.size(
) && "Size mismatch in arguments and offsets") ? void
(0) : __assert_fail ("TgtArgs.size() == TgtOffsets.size() && \"Size mismatch in arguments and offsets\""
, "openmp/libomptarget/src/omptarget.cpp", 1566, __extension__
__PRETTY_FUNCTION__))
1566 "Size mismatch in arguments and offsets")(static_cast <bool> (TgtArgs.size() == TgtOffsets.size(
) && "Size mismatch in arguments and offsets") ? void
(0) : __assert_fail ("TgtArgs.size() == TgtOffsets.size() && \"Size mismatch in arguments and offsets\""
, "openmp/libomptarget/src/omptarget.cpp", 1566, __extension__
__PRETTY_FUNCTION__))
;
1567
1568 // Pack and transfer first-private arguments
1569 Ret = PrivateArgumentManager.packAndTransfer(TgtArgs);
1570 if (Ret != OFFLOAD_SUCCESS(0)) {
1571 DP("Failed to pack and transfer first private arguments\n"){};
1572 return OFFLOAD_FAIL(~0);
1573 }
1574
1575 return OFFLOAD_SUCCESS(0);
1576}
1577
1578/// Process data after launching the kernel, including transferring data back to
1579/// host if needed and deallocating target memory of (first-)private variables.
1580static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
1581 int32_t ArgNum, void **ArgBases, void **Args,
1582 int64_t *ArgSizes, int64_t *ArgTypes,
1583 map_var_info_t *ArgNames, void **ArgMappers,
1584 PrivateArgumentManagerTy &PrivateArgumentManager,
1585 AsyncInfoTy &AsyncInfo) {
1586 TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("mappingAfterTargetRegion"
, SI.getProfileLocation())
;
1587 DeviceTy &Device = *PM->Devices[DeviceId];
1588
1589 // Move data from device.
1590 int Ret = targetDataEnd(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
1591 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
1592 if (Ret != OFFLOAD_SUCCESS(0)) {
1593 REPORT("Call to targetDataEnd failed, abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd failed, abort target.\n"); } while (
0);
;
1594 return OFFLOAD_FAIL(~0);
1595 }
1596
1597 // Free target memory for private arguments after synchronization.
1598 // TODO: We might want to remove `mutable` in the future by not changing the
1599 // captured variables somehow.
1600 AsyncInfo.addPostProcessingFunction(
1601 [PrivateArgumentManager =
1602 std::move(PrivateArgumentManager)]() mutable -> int {
1603 int Ret = PrivateArgumentManager.free();
1604 if (Ret != OFFLOAD_SUCCESS(0)) {
1605 REPORT("Failed to deallocate target memory for private args\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to deallocate target memory for private args\n"); }
while (0);
;
1606 return OFFLOAD_FAIL(~0);
1607 }
1608 return Ret;
1609 });
1610
1611 return OFFLOAD_SUCCESS(0);
1612}
1613} // namespace
1614
1615/// performs the same actions as data_begin in case arg_num is
1616/// non-zero and initiates run of the offloaded region on the target platform;
1617/// if arg_num is non-zero after the region execution is done it also
1618/// performs the same action as data_update and data_end above. This function
1619/// returns 0 if it was able to transfer the execution to a target and an
1620/// integer different from zero otherwise.
1621int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
1622 KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) {
1623 int32_t DeviceId = Device.DeviceID;
1624 TableMap *TM = getTableMap(HostPtr);
1625 // No map for this host pointer found!
1626 if (!TM) {
1627 REPORT("Host ptr " DPxMOD " does not have a matching target pointer.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
1628 DPxPTR(HostPtr))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
;
1629 return OFFLOAD_FAIL(~0);
1630 }
1631
1632 // get target table.
1633 __tgt_target_table *TargetTable = nullptr;
1634 {
1635 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1636 assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1637, __extension__
__PRETTY_FUNCTION__))
1637 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1637, __extension__
__PRETTY_FUNCTION__))
;
1638 TargetTable = TM->Table->TargetsTable[DeviceId];
1639 }
1640 assert(TargetTable && "Global data has not been mapped\n")(static_cast <bool> (TargetTable && "Global data has not been mapped\n"
) ? void (0) : __assert_fail ("TargetTable && \"Global data has not been mapped\\n\""
, "openmp/libomptarget/src/omptarget.cpp", 1640, __extension__
__PRETTY_FUNCTION__))
;
1641
1642 DP("loop trip count is %" PRIu64 ".\n", KernelArgs.Tripcount){};
1643
1644 // We need to keep bases and offsets separate. Sometimes (e.g. in OpenCL) we
1645 // need to manifest base pointers prior to launching a kernel. Even if we have
1646 // mapped an object only partially, e.g. A[N:M], although the kernel is
1647 // expected to access elements starting at address &A[N] and beyond, we still
1648 // need to manifest the base of the array &A[0]. In other cases, e.g. the COI
1649 // API, we need the begin address itself, i.e. &A[N], as the API operates on
1650 // begin addresses, not bases. That's why we pass args and offsets as two
1651 // separate entities so that each plugin can do what it needs. This behavior
1652 // was introdued via https://reviews.llvm.org/D33028 and commit 1546d319244c.
1653 SmallVector<void *> TgtArgs;
1654 SmallVector<ptrdiff_t> TgtOffsets;
1655
1656 PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
1657
1658 int NumClangLaunchArgs = KernelArgs.NumArgs;
1659 int Ret = OFFLOAD_SUCCESS(0);
1660 if (NumClangLaunchArgs) {
1661 // Process data, such as data mapping, before launching the kernel
1662 Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
1663 KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
1664 KernelArgs.ArgSizes, KernelArgs.ArgTypes,
1665 KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs,
1666 TgtOffsets, PrivateArgumentManager, AsyncInfo);
1667 if (Ret != OFFLOAD_SUCCESS(0)) {
1668 REPORT("Failed to process data before launching the kernel.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process data before launching the kernel.\n"); }
while (0);
;
1669 return OFFLOAD_FAIL(~0);
1670 }
1671
1672 // Clang might pass more values via the ArgPtrs to the runtime that we pass
1673 // on to the kernel.
1674 // TOOD: Next time we adjust the KernelArgsTy we should introduce a new
1675 // NumKernelArgs field.
1676 KernelArgs.NumArgs = TgtArgs.size();
1677 }
1678
1679 // Launch device execution.
1680 void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr;
1681 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",{}
1682 TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index){};
1683
1684 {
1685 assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!")(static_cast <bool> (KernelArgs.NumArgs == TgtArgs.size
() && "Argument count mismatch!") ? void (0) : __assert_fail
("KernelArgs.NumArgs == TgtArgs.size() && \"Argument count mismatch!\""
, "openmp/libomptarget/src/omptarget.cpp", 1685, __extension__
__PRETTY_FUNCTION__))
;
1686 TIMESCOPE_WITH_NAME_AND_IDENT("Initiate Kernel Launch", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("Initiate Kernel Launch"
, SI.getProfileLocation())
;
1687 Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
1688 KernelArgs, AsyncInfo);
1689 }
1690
1691 if (Ret != OFFLOAD_SUCCESS(0)) {
1692 REPORT("Executing target region abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Executing target region abort target.\n"); } while (0);
;
1693 return OFFLOAD_FAIL(~0);
1694 }
1695
1696 if (NumClangLaunchArgs) {
1697 // Transfer data back and deallocate target memory for (first-)private
1698 // variables
1699 Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
1700 KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
1701 KernelArgs.ArgSizes, KernelArgs.ArgTypes,
1702 KernelArgs.ArgNames, KernelArgs.ArgMappers,
1703 PrivateArgumentManager, AsyncInfo);
1704 if (Ret != OFFLOAD_SUCCESS(0)) {
1705 REPORT("Failed to process data after launching the kernel.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process data after launching the kernel.\n"); } while
(0);
;
1706 return OFFLOAD_FAIL(~0);
1707 }
1708 }
1709
1710 return OFFLOAD_SUCCESS(0);
1711}
1712
1713/// Executes a kernel using pre-recorded information for loading to
1714/// device memory to launch the target kernel with the pre-recorded
1715/// configuration.
1716int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
1717 void *DeviceMemory, int64_t DeviceMemorySize, void **TgtArgs,
1718 ptrdiff_t *TgtOffsets, int32_t NumArgs, int32_t NumTeams,
1719 int32_t ThreadLimit, uint64_t LoopTripCount,
1720 AsyncInfoTy &AsyncInfo) {
1721 int32_t DeviceId = Device.DeviceID;
1722 TableMap *TM = getTableMap(HostPtr);
1723 // Fail if the table map fails to find the target kernel pointer for the
1724 // provided host pointer.
1725 if (!TM) {
1726 REPORT("Host ptr " DPxMOD " does not have a matching target pointer.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
1727 DPxPTR(HostPtr))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
;
1728 return OFFLOAD_FAIL(~0);
1729 }
1730
1731 // Retrieve the target table of offloading entries.
1732 __tgt_target_table *TargetTable = nullptr;
1733 {
1734 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1735 assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1736, __extension__
__PRETTY_FUNCTION__))
1736 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1736, __extension__
__PRETTY_FUNCTION__))
;
1737 TargetTable = TM->Table->TargetsTable[DeviceId];
1738 }
1739 assert(TargetTable && "Global data has not been mapped\n")(static_cast <bool> (TargetTable && "Global data has not been mapped\n"
) ? void (0) : __assert_fail ("TargetTable && \"Global data has not been mapped\\n\""
, "openmp/libomptarget/src/omptarget.cpp", 1739, __extension__
__PRETTY_FUNCTION__))
;
1740
1741 // Retrieve the target kernel pointer, allocate and store the recorded device
1742 // memory data, and launch device execution.
1743 void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr;
1744 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",{}
1745 TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index){};
1746
1747 void *TgtPtr = Device.allocData(DeviceMemorySize, /* HstPtr */ nullptr,
1748 TARGET_ALLOC_DEFAULT);
1749 Device.submitData(TgtPtr, DeviceMemory, DeviceMemorySize, AsyncInfo);
1750
1751 KernelArgsTy KernelArgs = {0};
1752 KernelArgs.Version = 2;
1753 KernelArgs.NumArgs = NumArgs;
1754 KernelArgs.Tripcount = LoopTripCount;
1755 KernelArgs.NumTeams[0] = NumTeams;
1756 KernelArgs.ThreadLimit[0] = ThreadLimit;
1757
1758 int Ret = Device.launchKernel(TgtEntryPtr, TgtArgs, TgtOffsets, KernelArgs,
1759 AsyncInfo);
1760
1761 if (Ret != OFFLOAD_SUCCESS(0)) {
1762 REPORT("Executing target region abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Executing target region abort target.\n"); } while (0);
;
1763 return OFFLOAD_FAIL(~0);
1764 }
1765
1766 return OFFLOAD_SUCCESS(0);
1767}