Bug Summary

File:build/source/openmp/libomptarget/src/omptarget.cpp
Warning:line 527, column 7
Value stored to 'IsHostPtr' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name omptarget.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -I projects/openmp/libomptarget/src -I /build/source/openmp/libomptarget/src -I include -I /build/source/llvm/include -I projects/openmp/runtime/src -I /build/source/openmp/libomptarget/include -D OMPT_SUPPORT=1 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1670066131 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-enum-constexpr-conversion -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-12-03-132955-15984-1 -x c++ /build/source/openmp/libomptarget/src/omptarget.cpp
1//===------ omptarget.cpp - Target independent OpenMP target RTL -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the interface to be used by Clang during the codegen of a
10// target region.
11//
12//===----------------------------------------------------------------------===//
13
14#include "omptarget.h"
15#include "device.h"
16#include "private.h"
17#include "rtl.h"
18
19#include <cassert>
20#include <cstdint>
21#include <vector>
22
23using llvm::SmallVector;
24
25int AsyncInfoTy::synchronize() {
26 int Result = OFFLOAD_SUCCESS(0);
27 if (AsyncInfo.Queue) {
28 // If we have a queue we need to synchronize it now.
29 Result = Device.synchronize(*this);
30 assert(AsyncInfo.Queue == nullptr &&(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 32, __extension__ __PRETTY_FUNCTION__
))
31 "The device plugin should have nulled the queue to indicate there "(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 32, __extension__ __PRETTY_FUNCTION__
))
32 "are no outstanding actions!")(static_cast <bool> (AsyncInfo.Queue == nullptr &&
"The device plugin should have nulled the queue to indicate there "
"are no outstanding actions!") ? void (0) : __assert_fail ("AsyncInfo.Queue == nullptr && \"The device plugin should have nulled the queue to indicate there \" \"are no outstanding actions!\""
, "openmp/libomptarget/src/omptarget.cpp", 32, __extension__ __PRETTY_FUNCTION__
))
;
33 }
34 return Result;
35}
36
37void *&AsyncInfoTy::getVoidPtrLocation() {
38 BufferLocations.push_back(nullptr);
39 return BufferLocations.back();
40}
41
42/* All begin addresses for partially mapped structs must be 8-aligned in order
43 * to ensure proper alignment of members. E.g.
44 *
45 * struct S {
46 * int a; // 4-aligned
47 * int b; // 4-aligned
48 * int *p; // 8-aligned
49 * } s1;
50 * ...
51 * #pragma omp target map(tofrom: s1.b, s1.p[0:N])
52 * {
53 * s1.b = 5;
54 * for (int i...) s1.p[i] = ...;
55 * }
56 *
57 * Here we are mapping s1 starting from member b, so BaseAddress=&s1=&s1.a and
58 * BeginAddress=&s1.b. Let's assume that the struct begins at address 0x100,
59 * then &s1.a=0x100, &s1.b=0x104, &s1.p=0x108. Each member obeys the alignment
60 * requirements for its type. Now, when we allocate memory on the device, in
61 * CUDA's case cuMemAlloc() returns an address which is at least 256-aligned.
62 * This means that the chunk of the struct on the device will start at a
63 * 256-aligned address, let's say 0x200. Then the address of b will be 0x200 and
64 * address of p will be a misaligned 0x204 (on the host there was no need to add
65 * padding between b and p, so p comes exactly 4 bytes after b). If the device
66 * kernel tries to access s1.p, a misaligned address error occurs (as reported
67 * by the CUDA plugin). By padding the begin address down to a multiple of 8 and
68 * extending the size of the allocated chuck accordingly, the chuck on the
69 * device will start at 0x200 with the padding (4 bytes), then &s1.b=0x204 and
70 * &s1.p=0x208, as they should be to satisfy the alignment requirements.
71 */
72static const int64_t Alignment = 8;
73
74/// Map global data and execute pending ctors
75static int initLibrary(DeviceTy &Device) {
76 /*
77 * Map global data
78 */
79 int32_t DeviceId = Device.DeviceID;
80 int Rc = OFFLOAD_SUCCESS(0);
81 bool SupportsEmptyImages = Device.RTL->supports_empty_images &&
82 Device.RTL->supports_empty_images() > 0;
83 {
84 std::lock_guard<decltype(PM->TrlTblMtx)> LG(PM->TrlTblMtx);
85 for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) {
86 TranslationTable *TransTable =
87 &PM->HostEntriesBeginToTransTable[HostEntriesBegin];
88 if (TransTable->HostTable.EntriesBegin ==
89 TransTable->HostTable.EntriesEnd &&
90 !SupportsEmptyImages) {
91 // No host entry so no need to proceed
92 continue;
93 }
94
95 if (TransTable->TargetsTable[DeviceId] != 0) {
96 // Library entries have already been processed
97 continue;
98 }
99
100 // 1) get image.
101 assert(TransTable->TargetsImages.size() > (size_t)DeviceId &&(static_cast <bool> (TransTable->TargetsImages.size(
) > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TransTable->TargetsImages.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 102, __extension__
__PRETTY_FUNCTION__))
102 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TransTable->TargetsImages.size(
) > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TransTable->TargetsImages.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 102, __extension__
__PRETTY_FUNCTION__))
;
103 __tgt_device_image *Img = TransTable->TargetsImages[DeviceId];
104 if (!Img) {
105 REPORT("No image loaded for device id %d.\n", DeviceId)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "No image loaded for device id %d.\n", DeviceId); } while (
0);
;
106 Rc = OFFLOAD_FAIL(~0);
107 break;
108 }
109 // 2) load image into the target table.
110 __tgt_target_table *TargetTable = TransTable->TargetsTable[DeviceId] =
111 Device.loadBinary(Img);
112 // Unable to get table for this image: invalidate image and fail.
113 if (!TargetTable) {
114 REPORT("Unable to generate entries table for device id %d.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Unable to generate entries table for device id %d.\n", DeviceId
); } while (0);
115 DeviceId)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Unable to generate entries table for device id %d.\n", DeviceId
); } while (0);
;
116 TransTable->TargetsImages[DeviceId] = 0;
117 Rc = OFFLOAD_FAIL(~0);
118 break;
119 }
120
121 // Verify whether the two table sizes match.
122 size_t Hsize =
123 TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin;
124 size_t Tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin;
125
126 // Invalid image for these host entries!
127 if (Hsize != Tsize) {
128 REPORT(do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
129 "Host and Target tables mismatch for device id %d [%zx != %zx].\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
130 DeviceId, Hsize, Tsize)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host and Target tables mismatch for device id %d [%zx != %zx].\n"
, DeviceId, Hsize, Tsize); } while (0);
;
131 TransTable->TargetsImages[DeviceId] = 0;
132 TransTable->TargetsTable[DeviceId] = 0;
133 Rc = OFFLOAD_FAIL(~0);
134 break;
135 }
136
137 DeviceTy::HDTTMapAccessorTy HDTTMap =
138 Device.HostDataToTargetMap.getExclusiveAccessor();
139
140 __tgt_target_table *HostTable = &TransTable->HostTable;
141 for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin,
142 *CurrHostEntry = HostTable->EntriesBegin,
143 *EntryDeviceEnd = TargetTable->EntriesEnd;
144 CurrDeviceEntry != EntryDeviceEnd;
145 CurrDeviceEntry++, CurrHostEntry++) {
146 if (CurrDeviceEntry->size != 0) {
147 // has data.
148 assert(CurrDeviceEntry->size == CurrHostEntry->size &&(static_cast <bool> (CurrDeviceEntry->size == CurrHostEntry
->size && "data size mismatch") ? void (0) : __assert_fail
("CurrDeviceEntry->size == CurrHostEntry->size && \"data size mismatch\""
, "openmp/libomptarget/src/omptarget.cpp", 149, __extension__
__PRETTY_FUNCTION__))
149 "data size mismatch")(static_cast <bool> (CurrDeviceEntry->size == CurrHostEntry
->size && "data size mismatch") ? void (0) : __assert_fail
("CurrDeviceEntry->size == CurrHostEntry->size && \"data size mismatch\""
, "openmp/libomptarget/src/omptarget.cpp", 149, __extension__
__PRETTY_FUNCTION__))
;
150
151 // Fortran may use multiple weak declarations for the same symbol,
152 // therefore we must allow for multiple weak symbols to be loaded from
153 // the fat binary. Treat these mappings as any other "regular"
154 // mapping. Add entry to map.
155 if (Device.getTgtPtrBegin(HDTTMap, CurrHostEntry->addr,
156 CurrHostEntry->size))
157 continue;
158
159 DP("Add mapping from host " DPxMOD " to device " DPxMOD{}
160 " with size %zu"{}
161 "\n",{}
162 DPxPTR(CurrHostEntry->addr), DPxPTR(CurrDeviceEntry->addr),{}
163 CurrDeviceEntry->size){};
164 HDTTMap->emplace(new HostDataToTargetTy(
165 (uintptr_t)CurrHostEntry->addr /*HstPtrBase*/,
166 (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
167 (uintptr_t)CurrHostEntry->addr +
168 CurrHostEntry->size /*HstPtrEnd*/,
169 (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
170 false /*UseHoldRefCount*/, CurrHostEntry->name,
171 true /*IsRefCountINF*/));
172 }
173 }
174 }
175 }
176
177 if (Rc != OFFLOAD_SUCCESS(0)) {
178 return Rc;
179 }
180
181 /*
182 * Run ctors for static objects
183 */
184 if (!Device.PendingCtorsDtors.empty()) {
185 AsyncInfoTy AsyncInfo(Device);
186 // Call all ctors for all libraries registered so far
187 for (auto &Lib : Device.PendingCtorsDtors) {
188 if (!Lib.second.PendingCtors.empty()) {
189 DP("Has pending ctors... call now\n"){};
190 for (auto &Entry : Lib.second.PendingCtors) {
191 void *Ctor = Entry;
192 int Rc = target(nullptr, Device, Ctor, 0, nullptr, nullptr, nullptr,
193 nullptr, nullptr, nullptr, 1, 1, 0, true /*team*/,
194 AsyncInfo);
195 if (Rc != OFFLOAD_SUCCESS(0)) {
196 REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(Ctor))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Running ctor " "0x%0*" "l" "x" " failed.\n", ((int)(2 * sizeof
(uintptr_t))), ((uintptr_t)(Ctor))); } while (0);
;
197 return OFFLOAD_FAIL(~0);
198 }
199 }
200 // Clear the list to indicate that this device has been used
201 Lib.second.PendingCtors.clear();
202 DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(Lib.first)){};
203 }
204 }
205 // All constructors have been issued, wait for them now.
206 if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS(0))
207 return OFFLOAD_FAIL(~0);
208 }
209 Device.HasPendingGlobals = false;
210
211 return OFFLOAD_SUCCESS(0);
212}
213
214void handleTargetOutcome(bool Success, ident_t *Loc) {
215 switch (PM->TargetOffloadPolicy) {
216 case tgt_disabled:
217 if (Success) {
218 FATAL_MESSAGE0(1, "expected no offloading while offloading is disabled")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "expected no offloading while offloading is disabled"); abort
(); } while (0)
;
219 }
220 break;
221 case tgt_default:
222 FATAL_MESSAGE0(1, "default offloading policy must be switched to "do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "default offloading policy must be switched to " "mandatory or disabled"
); abort(); } while (0)
223 "mandatory or disabled")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "default offloading policy must be switched to " "mandatory or disabled"
); abort(); } while (0)
;
224 break;
225 case tgt_mandatory:
226 if (!Success) {
227 if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
228 for (auto &Device : PM->Devices)
229 dumpTargetPointerMappings(Loc, *Device);
230 else
231 FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html "do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"
); } while (0)
232 "for debugging options.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"
); } while (0)
;
233
234 SourceInfo Info(Loc);
235 if (Info.isAvailible())
236 fprintf(stderrstderr, "%s:%d:%d: ", Info.getFilename(), Info.getLine(),
237 Info.getColumn());
238 else
239 FAILURE_MESSAGE("Source location information not present. Compile with "do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Source location information not present. Compile with " "-g or -gline-tables-only.\n"
); } while (0)
240 "-g or -gline-tables-only.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Source location information not present. Compile with " "-g or -gline-tables-only.\n"
); } while (0)
;
241 FATAL_MESSAGE0(do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "failure of target construct while offloading is mandatory"
); abort(); } while (0)
242 1, "failure of target construct while offloading is mandatory")do { fprintf(stderr, "Libomptarget" " fatal error %d: %s\n", 1
, "failure of target construct while offloading is mandatory"
); abort(); } while (0)
;
243 } else {
244 if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
245 for (auto &Device : PM->Devices)
246 dumpTargetPointerMappings(Loc, *Device);
247 }
248 break;
249 }
250}
251
252static void handleDefaultTargetOffload() {
253 std::lock_guard<decltype(PM->TargetOffloadMtx)> LG(PM->TargetOffloadMtx);
254 if (PM->TargetOffloadPolicy == tgt_default) {
255 if (omp_get_num_devices() > 0) {
256 DP("Default TARGET OFFLOAD policy is now mandatory "{}
257 "(devices were found)\n"){};
258 PM->TargetOffloadPolicy = tgt_mandatory;
259 } else {
260 DP("Default TARGET OFFLOAD policy is now disabled "{}
261 "(no devices were found)\n"){};
262 PM->TargetOffloadPolicy = tgt_disabled;
263 }
264 }
265}
266
267static bool isOffloadDisabled() {
268 if (PM->TargetOffloadPolicy == tgt_default)
269 handleDefaultTargetOffload();
270 return PM->TargetOffloadPolicy == tgt_disabled;
271}
272
273// If offload is enabled, ensure that device DeviceID has been initialized,
274// global ctors have been executed, and global data has been mapped.
275//
276// The return bool indicates if the offload is to the host device
277// There are three possible results:
278// - Return false if the taregt device is ready for offload
279// - Return true without reporting a runtime error if offload is
280// disabled, perhaps because the initial device was specified.
281// - Report a runtime error and return true.
282//
283// If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device.
284// This step might be skipped if offload is disabled.
285bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc) {
286 if (isOffloadDisabled()) {
287 DP("Offload is disabled\n"){};
288 return true;
289 }
290
291 if (DeviceID == OFFLOAD_DEVICE_DEFAULT-1) {
292 DeviceID = omp_get_default_device();
293 DP("Use default device id %" PRId64 "\n", DeviceID){};
294 }
295
296 // Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669.
297 if (omp_get_num_devices() == 0) {
298 DP("omp_get_num_devices() == 0 but offload is manadatory\n"){};
299 handleTargetOutcome(false, Loc);
300 return true;
301 }
302
303 if (DeviceID == omp_get_initial_device()) {
304 DP("Device is host (%" PRId64 "), returning as if offload is disabled\n",{}
305 DeviceID){};
306 return true;
307 }
308
309 // Is device ready?
310 if (!deviceIsReady(DeviceID)) {
311 REPORT("Device %" PRId64 " is not ready.\n", DeviceID)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Device %" "l" "d" " is not ready.\n", DeviceID); } while (
0);
;
312 handleTargetOutcome(false, Loc);
313 return true;
314 }
315
316 // Get device info.
317 DeviceTy &Device = *PM->Devices[DeviceID];
318
319 // Check whether global data has been mapped for this device
320 {
321 std::lock_guard<decltype(Device.PendingGlobalsMtx)> LG(
322 Device.PendingGlobalsMtx);
323 if (Device.HasPendingGlobals && initLibrary(Device) != OFFLOAD_SUCCESS(0)) {
324 REPORT("Failed to init globals on device %" PRId64 "\n", DeviceID)do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to init globals on device %" "l" "d" "\n", DeviceID
); } while (0);
;
325 handleTargetOutcome(false, Loc);
326 return true;
327 }
328 }
329
330 return false;
331}
332
333static int32_t getParentIndex(int64_t Type) {
334 return ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
335}
336
337void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
338 const char *Name) {
339 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
340 DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size){};
341
342 if (Size <= 0) {
343 DP("Call to %s with non-positive length\n", Name){};
344 return NULL__null;
345 }
346
347 void *Rc = NULL__null;
348
349 if (DeviceNum == omp_get_initial_device()) {
350 Rc = malloc(Size);
351 DP("%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc)){};
352 return Rc;
353 }
354
355 if (!deviceIsReady(DeviceNum)) {
356 DP("%s returns NULL ptr\n", Name){};
357 return NULL__null;
358 }
359
360 DeviceTy &Device = *PM->Devices[DeviceNum];
361 Rc = Device.allocData(Size, nullptr, Kind);
362 DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc)){};
363 return Rc;
364}
365
366void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
367 const char *Name) {
368 TIMESCOPE()llvm::TimeTraceScope TimeScope(__FUNCTION__);
369 DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum,{}
370 DPxPTR(DevicePtr)){};
371
372 if (!DevicePtr) {
373 DP("Call to %s with NULL ptr\n", Name){};
374 return;
375 }
376
377 if (DeviceNum == omp_get_initial_device()) {
378 free(DevicePtr);
379 DP("%s deallocated host ptr\n", Name){};
380 return;
381 }
382
383 if (!deviceIsReady(DeviceNum)) {
384 DP("%s returns, nothing to do\n", Name){};
385 return;
386 }
387
388 PM->Devices[DeviceNum]->deleteData(DevicePtr, Kind);
389 DP("omp_target_free deallocated device ptr\n"){};
390}
391
392/// Call the user-defined mapper function followed by the appropriate
393// targetData* function (targetData{Begin,End,Update}).
394int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
395 int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames,
396 void *ArgMapper, AsyncInfoTy &AsyncInfo,
397 TargetDataFuncPtrTy TargetDataFunction) {
398 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
399 DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)){};
400
401 // The mapper function fills up Components.
402 MapperComponentsTy MapperComponents;
403 MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(ArgMapper);
404 (*MapperFuncPtr)((void *)&MapperComponents, ArgBase, Arg, ArgSize, ArgType,
405 ArgNames);
406
407 // Construct new arrays for args_base, args, arg_sizes and arg_types
408 // using the information in MapperComponents and call the corresponding
409 // targetData* function using these new arrays.
410 SmallVector<void *> MapperArgsBase(MapperComponents.Components.size());
411 SmallVector<void *> MapperArgs(MapperComponents.Components.size());
412 SmallVector<int64_t> MapperArgSizes(MapperComponents.Components.size());
413 SmallVector<int64_t> MapperArgTypes(MapperComponents.Components.size());
414 SmallVector<void *> MapperArgNames(MapperComponents.Components.size());
415
416 for (unsigned I = 0, E = MapperComponents.Components.size(); I < E; ++I) {
417 auto &C = MapperComponents.Components[I];
418 MapperArgsBase[I] = C.Base;
419 MapperArgs[I] = C.Begin;
420 MapperArgSizes[I] = C.Size;
421 MapperArgTypes[I] = C.Type;
422 MapperArgNames[I] = C.Name;
423 }
424
425 int Rc = TargetDataFunction(Loc, Device, MapperComponents.Components.size(),
426 MapperArgsBase.data(), MapperArgs.data(),
427 MapperArgSizes.data(), MapperArgTypes.data(),
428 MapperArgNames.data(), /*arg_mappers*/ nullptr,
429 AsyncInfo, /*FromMapper=*/true);
430
431 return Rc;
432}
433
434/// Internal function to do the mapping and transfer the data to the device
435int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
436 void **ArgsBase, void **Args, int64_t *ArgSizes,
437 int64_t *ArgTypes, map_var_info_t *ArgNames,
438 void **ArgMappers, AsyncInfoTy &AsyncInfo,
439 bool FromMapper) {
440 // process each input.
441 for (int32_t I = 0; I < ArgNum; ++I) {
442 // Ignore private variables and arrays - there is no mapping for them.
443 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
444 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
445 continue;
446
447 if (ArgMappers && ArgMappers[I]) {
448 // Instead of executing the regular path of targetDataBegin, call the
449 // targetDataMapper variant which will call targetDataBegin again
450 // with new arguments.
451 DP("Calling targetDataMapper for the %dth argument\n", I){};
452
453 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
454 int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
455 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
456 targetDataBegin);
457
458 if (Rc != OFFLOAD_SUCCESS(0)) {
459 REPORT("Call to targetDataBegin via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
460 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
461 return OFFLOAD_FAIL(~0);
462 }
463
464 // Skip the rest of this function, continue to the next argument.
465 continue;
466 }
467
468 void *HstPtrBegin = Args[I];
469 void *HstPtrBase = ArgsBase[I];
470 int64_t DataSize = ArgSizes[I];
471 map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
472
473 // Adjust for proper alignment if this is a combined entry (for structs).
474 // Look at the next argument - if that is MEMBER_OF this one, then this one
475 // is a combined entry.
476 int64_t Padding = 0;
477 const int NextI = I + 1;
478 if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
479 getParentIndex(ArgTypes[NextI]) == I) {
480 Padding = (int64_t)HstPtrBegin % Alignment;
481 if (Padding) {
482 DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD{}
483 "\n",{}
484 Padding, DPxPTR(HstPtrBegin)){};
485 HstPtrBegin = (char *)HstPtrBegin - Padding;
486 DataSize += Padding;
487 }
488 }
489
490 // Address of pointer on the host and device, respectively.
491 void *PointerHstPtrBegin, *PointerTgtPtrBegin;
492 TargetPointerResultTy PointerTpr;
493 bool IsHostPtr = false;
494 bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
495 // Force the creation of a device side copy of the data when:
496 // a close map modifier was associated with a map that contained a to.
497 bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE;
498 bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
499 bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
500 // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
501 // have reached this point via __tgt_target_data_begin and not __tgt_target
502 // then no argument is marked as TARGET_PARAM ("omp target data map" is not
503 // associated with a target region, so there are no target parameters). This
504 // may be considered a hack, we could revise the scheme in the future.
505 bool UpdateRef =
506 !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0);
507 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
508 DP("Has a pointer entry: \n"){};
509 // Base is address of pointer.
510 //
511 // Usually, the pointer is already allocated by this time. For example:
512 //
513 // #pragma omp target map(s.p[0:N])
514 //
515 // The map entry for s comes first, and the PTR_AND_OBJ entry comes
516 // afterward, so the pointer is already allocated by the time the
517 // PTR_AND_OBJ entry is handled below, and PointerTgtPtrBegin is thus
518 // non-null. However, "declare target link" can produce a PTR_AND_OBJ
519 // entry for a global that might not already be allocated by the time the
520 // PTR_AND_OBJ entry is handled below, and so the allocation might fail
521 // when HasPresentModifier.
522 PointerTpr = Device.getTargetPointer(
523 HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr,
524 /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef,
525 HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo);
526 PointerTgtPtrBegin = PointerTpr.TargetPointer;
527 IsHostPtr = PointerTpr.Flags.IsHostPointer;
Value stored to 'IsHostPtr' is never read
528 if (!PointerTgtPtrBegin) {
529 REPORT("Call to getTargetPointer returned null pointer (%s).\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
530 HasPresentModifier ? "'present' map type modifier"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
531 : "device failure or illegal mapping")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
;
532 return OFFLOAD_FAIL(~0);
533 }
534 DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"{}
535 "\n",{}
536 sizeof(void *), DPxPTR(PointerTgtPtrBegin),{}
537 (PointerTpr.Flags.IsNewEntry ? "" : " not")){};
538 PointerHstPtrBegin = HstPtrBase;
539 // modify current entry.
540 HstPtrBase = *(void **)HstPtrBase;
541 // No need to update pointee ref count for the first element of the
542 // subelement that comes from mapper.
543 UpdateRef =
544 (!FromMapper || I != 0); // subsequently update ref count of pointee
545 }
546
547 const bool HasFlagTo = ArgTypes[I] & OMP_TGT_MAPTYPE_TO;
548 const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
549 auto TPR = Device.getTargetPointer(
550 HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, HasFlagAlways,
551 IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier,
552 HasHoldModifier, AsyncInfo);
553 void *TgtPtrBegin = TPR.TargetPointer;
554 IsHostPtr = TPR.Flags.IsHostPointer;
555 // If data_size==0, then the argument could be a zero-length pointer to
556 // NULL, so getOrAlloc() returning NULL is not an error.
557 if (!TgtPtrBegin && (DataSize || HasPresentModifier)) {
558 REPORT("Call to getTargetPointer returned null pointer (%s).\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
559 HasPresentModifier ? "'present' map type modifier"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
560 : "device failure or illegal mapping")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier
? "'present' map type modifier" : "device failure or illegal mapping"
); } while (0);
;
561 return OFFLOAD_FAIL(~0);
562 }
563 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD{}
564 " - is%s new\n",{}
565 DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")){};
566
567 if (ArgTypes[I] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
568 uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
569 void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
570 DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase)){};
571 ArgsBase[I] = TgtPtrBase;
572 }
573
574 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
575 // Check whether we need to update the pointer on the device
576 bool UpdateDevPtr = false;
577
578 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
579 void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
580
581 Device.ShadowMtx.lock();
582 auto Entry = Device.ShadowPtrMap.find(PointerHstPtrBegin);
583 // If this pointer is not in the map we need to insert it. If the map
584 // contains a stale entry, we need to update it (e.g. if the pointee was
585 // deallocated and later on is reallocated at another device address). The
586 // latter scenario is the subject of LIT test env/base_ptr_ref_count.c. An
587 // entry is removed from ShadowPtrMap only when the PTR of a PTR_AND_OBJ
588 // pair is deallocated, not when the OBJ is deallocated. In
589 // env/base_ptr_ref_count.c the PTR is a global "declare target" pointer,
590 // so it stays in the map for the lifetime of the application. When the
591 // OBJ is deallocated and later on allocated again (at a different device
592 // address), ShadowPtrMap still contains an entry for Pointer_HstPtrBegin
593 // which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ.
594 if (Entry == Device.ShadowPtrMap.end() ||
595 Entry->second.TgtPtrVal != ExpectedTgtPtrBase) {
596 // create or update shadow pointers for this entry
597 Device.ShadowPtrMap[PointerHstPtrBegin] = {
598 HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase};
599 PointerTpr.Entry->setMayContainAttachedPointers();
600 UpdateDevPtr = true;
601 }
602
603 if (UpdateDevPtr) {
604 std::lock_guard<decltype(*PointerTpr.Entry)> LG(*PointerTpr.Entry);
605 Device.ShadowMtx.unlock();
606
607 DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",{}
608 DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)){};
609
610 void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation();
611 TgtPtrBase = ExpectedTgtPtrBase;
612
613 int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase,
614 sizeof(void *), AsyncInfo);
615 if (Ret != OFFLOAD_SUCCESS(0)) {
616 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
617 return OFFLOAD_FAIL(~0);
618 }
619 if (PointerTpr.Entry->addEventIfNecessary(Device, AsyncInfo) !=
620 OFFLOAD_SUCCESS(0))
621 return OFFLOAD_FAIL(~0);
622 } else
623 Device.ShadowMtx.unlock();
624 }
625 }
626
627 return OFFLOAD_SUCCESS(0);
628}
629
630namespace {
631/// This structure contains information to deallocate a target pointer, aka.
632/// used to fix up the shadow map and potentially delete the entry from the
633/// mapping table via \p DeviceTy::deallocTgtPtr.
634struct PostProcessingInfo {
635 /// Host pointer used to look up into the map table
636 void *HstPtrBegin;
637
638 /// Size of the data
639 int64_t DataSize;
640
641 /// The mapping type (bitfield).
642 int64_t ArgType;
643
644 /// The target pointer information.
645 TargetPointerResultTy TPR;
646
647 /// Are we expecting to delete this entry or not. Even if set, we might not
648 /// delete the entry if another thread reused the entry in the meantime.
649 bool DelEntry;
650
651 PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType, bool DelEntry,
652 TargetPointerResultTy TPR)
653 : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), TPR(TPR),
654 DelEntry(DelEntry) {}
655};
656
657/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to
658/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the
659/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the
660/// rest of the map is not checked anymore.
661template <typename CBTy>
662static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin,
663 uintptr_t Size,
664 const TargetPointerResultTy &TPR) {
665 // If we have an object that is too small to hold a pointer subobject, no need
666 // to do any checking.
667 if (Size < sizeof(void *))
668 return;
669
670 // If the map entry for the object was never marked as containing attached
671 // pointers, no need to do any checking.
672 if (!TPR.Entry || !TPR.Entry->getMayContainAttachedPointers())
673 return;
674
675 uintptr_t LB = (uintptr_t)Begin;
676 uintptr_t UB = LB + Size;
677 // Now we are looking into the shadow map so we need to lock it.
678 std::lock_guard<decltype(Device.ShadowMtx)> LG(Device.ShadowMtx);
679 for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin();
680 Itr != Device.ShadowPtrMap.end();) {
681 uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first;
682
683 // An STL map is sorted on its keys; use this property
684 // to quickly determine when to break out of the loop.
685 if (ShadowHstPtrAddr < LB) {
686 ++Itr;
687 continue;
688 }
689 if (ShadowHstPtrAddr >= UB)
690 break;
691
692 if (CB(Itr) == OFFLOAD_FAIL(~0))
693 break;
694 }
695}
696
697} // namespace
698
699/// Internal function to undo the mapping and retrieve the data from the device.
700int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
701 void **ArgBases, void **Args, int64_t *ArgSizes,
702 int64_t *ArgTypes, map_var_info_t *ArgNames,
703 void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
704 int Ret;
705 SmallVector<PostProcessingInfo> PostProcessingPtrs;
706 void *FromMapperBase = nullptr;
707 // process each input.
708 for (int32_t I = ArgNum - 1; I >= 0; --I) {
709 // Ignore private variables and arrays - there is no mapping for them.
710 // Also, ignore the use_device_ptr directive, it has no effect here.
711 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
712 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
713 continue;
714
715 if (ArgMappers && ArgMappers[I]) {
716 // Instead of executing the regular path of targetDataEnd, call the
717 // targetDataMapper variant which will call targetDataEnd again
718 // with new arguments.
719 DP("Calling targetDataMapper for the %dth argument\n", I){};
720
721 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
722 Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I],
723 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
724 targetDataEnd);
725
726 if (Ret != OFFLOAD_SUCCESS(0)) {
727 REPORT("Call to targetDataEnd via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
728 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
729 return OFFLOAD_FAIL(~0);
730 }
731
732 // Skip the rest of this function, continue to the next argument.
733 continue;
734 }
735
736 void *HstPtrBegin = Args[I];
737 int64_t DataSize = ArgSizes[I];
738 // Adjust for proper alignment if this is a combined entry (for structs).
739 // Look at the next argument - if that is MEMBER_OF this one, then this one
740 // is a combined entry.
741 const int NextI = I + 1;
742 if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
743 getParentIndex(ArgTypes[NextI]) == I) {
744 int64_t Padding = (int64_t)HstPtrBegin % Alignment;
745 if (Padding) {
746 DP("Using a Padding of %" PRId64 " bytes for begin address " DPxMOD{}
747 "\n",{}
748 Padding, DPxPTR(HstPtrBegin)){};
749 HstPtrBegin = (char *)HstPtrBegin - Padding;
750 DataSize += Padding;
751 }
752 }
753
754 bool IsLast, IsHostPtr;
755 bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
756 bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
757 (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
758 !(FromMapper && I == 0);
759 bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;
760 bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
761 bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
762
763 // If PTR_AND_OBJ, HstPtrBegin is address of pointee
764 TargetPointerResultTy TPR = Device.getTgtPtrBegin(
765 HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr,
766 !IsImplicit, ForceDelete);
767 void *TgtPtrBegin = TPR.TargetPointer;
768 if (!TPR.isPresent() && !TPR.isHostPointer() &&
769 (DataSize || HasPresentModifier)) {
770 DP("Mapping does not exist (%s)\n",{}
771 (HasPresentModifier ? "'present' map type modifier" : "ignored")){};
772 if (HasPresentModifier) {
773 // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 350 L10-13:
774 // "If a map clause appears on a target, target data, target enter data
775 // or target exit data construct with a present map-type-modifier then
776 // on entry to the region if the corresponding list item does not appear
777 // in the device data environment then an error occurs and the program
778 // terminates."
779 //
780 // This should be an error upon entering an "omp target exit data". It
781 // should not be an error upon exiting an "omp target data" or "omp
782 // target". For "omp target data", Clang thus doesn't include present
783 // modifiers for end calls. For "omp target", we have not found a valid
784 // OpenMP program for which the error matters: it appears that, if a
785 // program can guarantee that data is present at the beginning of an
786 // "omp target" region so that there's no error there, that data is also
787 // guaranteed to be present at the end.
788 MESSAGE("device mapping required by 'present' map type modifier does "do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
789 "not exist for host address " DPxMOD " (%" PRId64 " bytes)",do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
790 DPxPTR(HstPtrBegin), DataSize)do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' map type modifier does "
"not exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), DataSize); } while (0)
;
791 return OFFLOAD_FAIL(~0);
792 }
793 } else {
794 DP("There are %" PRId64 " bytes allocated at target address " DPxMOD{}
795 " - is%s last\n",{}
796 DataSize, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not")){};
797 }
798
799 // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16:
800 // "If the map clause appears on a target, target data, or target exit data
801 // construct and a corresponding list item of the original list item is not
802 // present in the device data environment on exit from the region then the
803 // list item is ignored."
804 if (!TPR.isPresent())
805 continue;
806
807 bool DelEntry = IsLast;
808
809 // If the last element from the mapper (for end transfer args comes in
810 // reverse order), do not remove the partial entry, the parent struct still
811 // exists.
812 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
813 !(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
814 DelEntry = false; // protect parent struct from being deallocated
815 }
816
817 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
818 // Move data back to the host
819 if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {
820 bool Always = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
821 if ((Always || IsLast) && !IsHostPtr) {
822 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",{}
823 DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)){};
824
825 std::lock_guard<decltype(*TPR.Entry)> LG(*TPR.Entry);
826 // Wait for any previous transfer if an event is present.
827 if (void *Event = TPR.Entry->getEvent()) {
828 if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS(0)) {
829 REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to wait for event " "0x%0*" "l" "x" ".\n", ((int)(2
* sizeof(uintptr_t))), ((uintptr_t)(Event))); } while (0);
;
830 return OFFLOAD_FAIL(~0);
831 }
832 }
833
834 Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize,
835 AsyncInfo);
836 if (Ret != OFFLOAD_SUCCESS(0)) {
837 REPORT("Copying data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data from device failed.\n"); } while (0);
;
838 return OFFLOAD_FAIL(~0);
839 }
840
841 // As we are expecting to delete the entry the d2h copy might race
842 // with another one that also tries to delete the entry. This happens
843 // as the entry can be reused and the reuse might happen after the
844 // copy-back was issued but before it completed. Since the reuse might
845 // also copy-back a value we would race.
846 if (IsLast) {
847 if (TPR.Entry->addEventIfNecessary(Device, AsyncInfo) !=
848 OFFLOAD_SUCCESS(0))
849 return OFFLOAD_FAIL(~0);
850 }
851 }
852 }
853 if (DelEntry && FromMapper && I == 0) {
854 DelEntry = false;
855 FromMapperBase = HstPtrBegin;
856 }
857
858 // Add pointer to the buffer for post-synchronize processing.
859 PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I],
860 DelEntry && !IsHostPtr, TPR);
861 }
862 }
863
864 // TODO: We should not synchronize here but pass the AsyncInfo object to the
865 // allocate/deallocate device APIs.
866 //
867 // We need to synchronize before deallocating data.
868 Ret = AsyncInfo.synchronize();
869 if (Ret != OFFLOAD_SUCCESS(0))
870 return OFFLOAD_FAIL(~0);
871
872 // Deallocate target pointer
873 for (PostProcessingInfo &Info : PostProcessingPtrs) {
874 // If we marked the entry to be deleted we need to verify no other thread
875 // reused it by now. If deletion is still supposed to happen by this thread
876 // LR will be set and exclusive access to the HDTT map will avoid another
877 // thread reusing the entry now. Note that we do not request (exclusive)
878 // access to the HDTT map if Info.DelEntry is not set.
879 LookupResult LR;
880 DeviceTy::HDTTMapAccessorTy HDTTMap =
881 Device.HostDataToTargetMap.getExclusiveAccessor(!Info.DelEntry);
882
883 if (Info.DelEntry) {
884 LR = Device.lookupMapping(HDTTMap, Info.HstPtrBegin, Info.DataSize);
885 if (LR.Entry->getTotalRefCount() != 0 ||
886 LR.Entry->getDeleteThreadId() != std::this_thread::get_id()) {
887 // The thread is not in charge of deletion anymore. Give up access to
888 // the HDTT map and unset the deletion flag.
889 HDTTMap.destroy();
890 Info.DelEntry = false;
891 }
892 }
893
894 // If we copied back to the host a struct/array containing pointers, we
895 // need to restore the original host pointer values from their shadow
896 // copies. If the struct is going to be deallocated, remove any remaining
897 // shadow pointer entries for this struct.
898 auto CB = [&](ShadowPtrListTy::iterator &Itr) {
899 // If we copied the struct to the host, we need to restore the pointer.
900 if (Info.ArgType & OMP_TGT_MAPTYPE_FROM) {
901 void **ShadowHstPtrAddr = (void **)Itr->first;
902 *ShadowHstPtrAddr = Itr->second.HstPtrVal;
903 DP("Restoring original host pointer value " DPxMOD " for host "{}
904 "pointer " DPxMOD "\n",{}
905 DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)){};
906 }
907 // If the struct is to be deallocated, remove the shadow entry.
908 if (Info.DelEntry) {
909 DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)){};
910 auto OldItr = Itr;
911 Itr++;
912 Device.ShadowPtrMap.erase(OldItr);
913 } else {
914 ++Itr;
915 }
916 return OFFLOAD_SUCCESS(0);
917 };
918 applyToShadowMapEntries(Device, CB, Info.HstPtrBegin, Info.DataSize,
919 Info.TPR);
920
921 // If we are deleting the entry the DataMapMtx is locked and we own the
922 // entry.
923 if (Info.DelEntry) {
924 if (!FromMapperBase || FromMapperBase != Info.HstPtrBegin)
925 Ret = Device.deallocTgtPtr(HDTTMap, LR, Info.DataSize);
926
927 if (Ret != OFFLOAD_SUCCESS(0)) {
928 REPORT("Deallocating data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Deallocating data from device failed.\n"); } while (0);
;
929 break;
930 }
931 }
932 }
933
934 return Ret;
935}
936
937static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
938 void *HstPtrBegin, int64_t ArgSize,
939 int64_t ArgType, AsyncInfoTy &AsyncInfo) {
940 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
941 bool IsLast, IsHostPtr;
942 TargetPointerResultTy TPR = Device.getTgtPtrBegin(
943 HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false,
944 /*UseHoldRefCount=*/false, IsHostPtr, /*MustContain=*/true);
945 void *TgtPtrBegin = TPR.TargetPointer;
946 if (!TPR.isPresent()) {
947 DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)){};
948 if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
949 MESSAGE("device mapping required by 'present' motion modifier does not "do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
950 "exist for host address " DPxMOD " (%" PRId64 " bytes)",do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
951 DPxPTR(HstPtrBegin), ArgSize)do { fprintf(stderr, "Libomptarget" " message: " "device mapping required by 'present' motion modifier does not "
"exist for host address " "0x%0*" "l" "x" " (%" "l" "d" " bytes)"
"\n", ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HstPtrBegin
)), ArgSize); } while (0)
;
952 return OFFLOAD_FAIL(~0);
953 }
954 return OFFLOAD_SUCCESS(0);
955 }
956
957 if (IsHostPtr) {
958 DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",{}
959 DPxPTR(HstPtrBegin)){};
960 return OFFLOAD_SUCCESS(0);
961 }
962
963 if (ArgType & OMP_TGT_MAPTYPE_FROM) {
964 DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",{}
965 ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)){};
966 int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo);
967 if (Ret != OFFLOAD_SUCCESS(0)) {
968 REPORT("Copying data from device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data from device failed.\n"); } while (0);
;
969 return OFFLOAD_FAIL(~0);
970 }
971
972 auto CB = [&](ShadowPtrListTy::iterator &Itr) {
973 void **ShadowHstPtrAddr = (void **)Itr->first;
974 // Wait for device-to-host memcopies for whole struct to complete,
975 // before restoring the correct host pointer.
976 if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS(0))
977 return OFFLOAD_FAIL(~0);
978 *ShadowHstPtrAddr = Itr->second.HstPtrVal;
979 DP("Restoring original host pointer value " DPxMOD{}
980 " for host pointer " DPxMOD "\n",{}
981 DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)){};
982 ++Itr;
983 return OFFLOAD_SUCCESS(0);
984 };
985 applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
986 }
987
988 if (ArgType & OMP_TGT_MAPTYPE_TO) {
989 DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",{}
990 ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)){};
991 int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo);
992 if (Ret != OFFLOAD_SUCCESS(0)) {
993 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
994 return OFFLOAD_FAIL(~0);
995 }
996
997 auto CB = [&](ShadowPtrListTy::iterator &Itr) {
998 DP("Restoring original target pointer value " DPxMOD " for target "{}
999 "pointer " DPxMOD "\n",{}
1000 DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr)){};
1001 Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal,
1002 sizeof(void *), AsyncInfo);
1003 if (Ret != OFFLOAD_SUCCESS(0))
1004 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1005 ++Itr;
1006 return Ret;
1007 };
1008 applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR);
1009 }
1010 return OFFLOAD_SUCCESS(0);
1011}
1012
1013static int targetDataNonContiguous(ident_t *Loc, DeviceTy &Device,
1014 void *ArgsBase,
1015 __tgt_target_non_contig *NonContig,
1016 uint64_t Size, int64_t ArgType,
1017 int CurrentDim, int DimSize, uint64_t Offset,
1018 AsyncInfoTy &AsyncInfo) {
1019 TIMESCOPE_WITH_IDENT(Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(__FUNCTION__
, SI.getProfileLocation())
;
1020 int Ret = OFFLOAD_SUCCESS(0);
1021 if (CurrentDim < DimSize) {
1022 for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
1023 uint64_t CurOffset =
1024 (NonContig[CurrentDim].Offset + I) * NonContig[CurrentDim].Stride;
1025 // we only need to transfer the first element for the last dimension
1026 // since we've already got a contiguous piece.
1027 if (CurrentDim != DimSize - 1 || I == 0) {
1028 Ret = targetDataNonContiguous(Loc, Device, ArgsBase, NonContig, Size,
1029 ArgType, CurrentDim + 1, DimSize,
1030 Offset + CurOffset, AsyncInfo);
1031 // Stop the whole process if any contiguous piece returns anything
1032 // other than OFFLOAD_SUCCESS.
1033 if (Ret != OFFLOAD_SUCCESS(0))
1034 return Ret;
1035 }
1036 }
1037 } else {
1038 char *Ptr = (char *)ArgsBase + Offset;
1039 DP("Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64{}
1040 " len %" PRIu64 "\n",{}
1041 DPxPTR(Ptr), Offset, Size){};
1042 Ret = targetDataContiguous(Loc, Device, ArgsBase, Ptr, Size, ArgType,
1043 AsyncInfo);
1044 }
1045 return Ret;
1046}
1047
1048static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
1049 int32_t DimSize) {
1050 int RemovedDim = 0;
1051 for (int I = DimSize - 1; I > 0; --I) {
1052 if (NonContig[I].Count * NonContig[I].Stride == NonContig[I - 1].Stride)
1053 RemovedDim++;
1054 }
1055 return RemovedDim;
1056}
1057
1058/// Internal function to pass data to/from the target.
1059int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
1060 void **ArgsBase, void **Args, int64_t *ArgSizes,
1061 int64_t *ArgTypes, map_var_info_t *ArgNames,
1062 void **ArgMappers, AsyncInfoTy &AsyncInfo, bool) {
1063 // process each input.
1064 for (int32_t I = 0; I < ArgNum; ++I) {
1065 if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
1066 (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
1067 continue;
1068
1069 if (ArgMappers && ArgMappers[I]) {
1070 // Instead of executing the regular path of targetDataUpdate, call the
1071 // targetDataMapper variant which will call targetDataUpdate again
1072 // with new arguments.
1073 DP("Calling targetDataMapper for the %dth argument\n", I){};
1074
1075 map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
1076 int Ret = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
1077 ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
1078 targetDataUpdate);
1079
1080 if (Ret != OFFLOAD_SUCCESS(0)) {
1081 REPORT("Call to targetDataUpdate via targetDataMapper for custom mapper"do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataUpdate via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
1082 " failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataUpdate via targetDataMapper for custom mapper"
" failed.\n"); } while (0);
;
1083 return OFFLOAD_FAIL(~0);
1084 }
1085
1086 // Skip the rest of this function, continue to the next argument.
1087 continue;
1088 }
1089
1090 int Ret = OFFLOAD_SUCCESS(0);
1091
1092 if (ArgTypes[I] & OMP_TGT_MAPTYPE_NON_CONTIG) {
1093 __tgt_target_non_contig *NonContig = (__tgt_target_non_contig *)Args[I];
1094 int32_t DimSize = ArgSizes[I];
1095 uint64_t Size =
1096 NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
1097 int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
1098 Ret = targetDataNonContiguous(
1099 Loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
1100 /*current_dim=*/0, DimSize - MergedDim, /*offset=*/0, AsyncInfo);
1101 } else {
1102 Ret = targetDataContiguous(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
1103 ArgTypes[I], AsyncInfo);
1104 }
1105 if (Ret == OFFLOAD_FAIL(~0))
1106 return OFFLOAD_FAIL(~0);
1107 }
1108 return OFFLOAD_SUCCESS(0);
1109}
1110
1111static const unsigned LambdaMapping = OMP_TGT_MAPTYPE_PTR_AND_OBJ |
1112 OMP_TGT_MAPTYPE_LITERAL |
1113 OMP_TGT_MAPTYPE_IMPLICIT;
1114static bool isLambdaMapping(int64_t Mapping) {
1115 return (Mapping & LambdaMapping) == LambdaMapping;
1116}
1117
1118namespace {
1119/// Find the table information in the map or look it up in the translation
1120/// tables.
1121TableMap *getTableMap(void *HostPtr) {
1122 std::lock_guard<std::mutex> TblMapLock(PM->TblMapMtx);
1123 HostPtrToTableMapTy::iterator TableMapIt =
1124 PM->HostPtrToTableMap.find(HostPtr);
1125
1126 if (TableMapIt != PM->HostPtrToTableMap.end())
1127 return &TableMapIt->second;
1128
1129 // We don't have a map. So search all the registered libraries.
1130 TableMap *TM = nullptr;
1131 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1132 for (HostEntriesBeginToTransTableTy::iterator Itr =
1133 PM->HostEntriesBeginToTransTable.begin();
1134 Itr != PM->HostEntriesBeginToTransTable.end(); ++Itr) {
1135 // get the translation table (which contains all the good info).
1136 TranslationTable *TransTable = &Itr->second;
1137 // iterate over all the host table entries to see if we can locate the
1138 // host_ptr.
1139 __tgt_offload_entry *Cur = TransTable->HostTable.EntriesBegin;
1140 for (uint32_t I = 0; Cur < TransTable->HostTable.EntriesEnd; ++Cur, ++I) {
1141 if (Cur->addr != HostPtr)
1142 continue;
1143 // we got a match, now fill the HostPtrToTableMap so that we
1144 // may avoid this search next time.
1145 TM = &(PM->HostPtrToTableMap)[HostPtr];
1146 TM->Table = TransTable;
1147 TM->Index = I;
1148 return TM;
1149 }
1150 }
1151
1152 return nullptr;
1153}
1154
1155/// A class manages private arguments in a target region.
1156class PrivateArgumentManagerTy {
1157 /// A data structure for the information of first-private arguments. We can
1158 /// use this information to optimize data transfer by packing all
1159 /// first-private arguments and transfer them all at once.
1160 struct FirstPrivateArgInfoTy {
1161 /// The index of the element in \p TgtArgs corresponding to the argument
1162 const int Index;
1163 /// Host pointer begin
1164 const char *HstPtrBegin;
1165 /// Host pointer end
1166 const char *HstPtrEnd;
1167 /// Aligned size
1168 const int64_t AlignedSize;
1169 /// Host pointer name
1170 const map_var_info_t HstPtrName = nullptr;
1171
1172 FirstPrivateArgInfoTy(int Index, const void *HstPtr, int64_t Size,
1173 const map_var_info_t HstPtrName = nullptr)
1174 : Index(Index), HstPtrBegin(reinterpret_cast<const char *>(HstPtr)),
1175 HstPtrEnd(HstPtrBegin + Size), AlignedSize(Size + Size % Alignment),
1176 HstPtrName(HstPtrName) {}
1177 };
1178
1179 /// A vector of target pointers for all private arguments
1180 SmallVector<void *> TgtPtrs;
1181
1182 /// A vector of information of all first-private arguments to be packed
1183 SmallVector<FirstPrivateArgInfoTy> FirstPrivateArgInfo;
1184 /// Host buffer for all arguments to be packed
1185 SmallVector<char> FirstPrivateArgBuffer;
1186 /// The total size of all arguments to be packed
1187 int64_t FirstPrivateArgSize = 0;
1188
1189 /// A reference to the \p DeviceTy object
1190 DeviceTy &Device;
1191 /// A pointer to a \p AsyncInfoTy object
1192 AsyncInfoTy &AsyncInfo;
1193
1194 // TODO: What would be the best value here? Should we make it configurable?
1195 // If the size is larger than this threshold, we will allocate and transfer it
1196 // immediately instead of packing it.
1197 static constexpr const int64_t FirstPrivateArgSizeThreshold = 1024;
1198
1199public:
1200 /// Constructor
1201 PrivateArgumentManagerTy(DeviceTy &Dev, AsyncInfoTy &AsyncInfo)
1202 : Device(Dev), AsyncInfo(AsyncInfo) {}
1203
1204 /// Add a private argument
1205 int addArg(void *HstPtr, int64_t ArgSize, int64_t ArgOffset,
1206 bool IsFirstPrivate, void *&TgtPtr, int TgtArgsIndex,
1207 const map_var_info_t HstPtrName = nullptr,
1208 const bool AllocImmediately = false) {
1209 // If the argument is not first-private, or its size is greater than a
1210 // predefined threshold, we will allocate memory and issue the transfer
1211 // immediately.
1212 if (ArgSize > FirstPrivateArgSizeThreshold || !IsFirstPrivate ||
1213 AllocImmediately) {
1214 TgtPtr = Device.allocData(ArgSize, HstPtr);
1215 if (!TgtPtr) {
1216 DP("Data allocation for %sprivate array " DPxMOD " failed.\n",{}
1217 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtr)){};
1218 return OFFLOAD_FAIL(~0);
1219 }
1220#ifdef OMPTARGET_DEBUG
1221 void *TgtPtrBase = (void *)((intptr_t)TgtPtr + ArgOffset);
1222 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD{}
1223 " for %sprivate array " DPxMOD " - pushing target argument " DPxMOD{}
1224 "\n",{}
1225 ArgSize, DPxPTR(TgtPtr), (IsFirstPrivate ? "first-" : ""),{}
1226 DPxPTR(HstPtr), DPxPTR(TgtPtrBase)){};
1227#endif
1228 // If first-private, copy data from host
1229 if (IsFirstPrivate) {
1230 DP("Submitting firstprivate data to the device.\n"){};
1231 int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, AsyncInfo);
1232 if (Ret != OFFLOAD_SUCCESS(0)) {
1233 DP("Copying data to device failed, failed.\n"){};
1234 return OFFLOAD_FAIL(~0);
1235 }
1236 }
1237 TgtPtrs.push_back(TgtPtr);
1238 } else {
1239 DP("Firstprivate array " DPxMOD " of size %" PRId64 " will be packed\n",{}
1240 DPxPTR(HstPtr), ArgSize){};
1241 // When reach this point, the argument must meet all following
1242 // requirements:
1243 // 1. Its size does not exceed the threshold (see the comment for
1244 // FirstPrivateArgSizeThreshold);
1245 // 2. It must be first-private (needs to be mapped to target device).
1246 // We will pack all this kind of arguments to transfer them all at once
1247 // to reduce the number of data transfer. We will not take
1248 // non-first-private arguments, aka. private arguments that doesn't need
1249 // to be mapped to target device, into account because data allocation
1250 // can be very efficient with memory manager.
1251
1252 // Placeholder value
1253 TgtPtr = nullptr;
1254 FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
1255 HstPtrName);
1256 FirstPrivateArgSize += FirstPrivateArgInfo.back().AlignedSize;
1257 }
1258
1259 return OFFLOAD_SUCCESS(0);
1260 }
1261
1262 /// Pack first-private arguments, replace place holder pointers in \p TgtArgs,
1263 /// and start the transfer.
1264 int packAndTransfer(SmallVector<void *> &TgtArgs) {
1265 if (!FirstPrivateArgInfo.empty()) {
1266 assert(FirstPrivateArgSize != 0 &&(static_cast <bool> (FirstPrivateArgSize != 0 &&
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")
? void (0) : __assert_fail ("FirstPrivateArgSize != 0 && \"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty\""
, "openmp/libomptarget/src/omptarget.cpp", 1267, __extension__
__PRETTY_FUNCTION__))
1267 "FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")(static_cast <bool> (FirstPrivateArgSize != 0 &&
"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty")
? void (0) : __assert_fail ("FirstPrivateArgSize != 0 && \"FirstPrivateArgSize is 0 but FirstPrivateArgInfo is empty\""
, "openmp/libomptarget/src/omptarget.cpp", 1267, __extension__
__PRETTY_FUNCTION__))
;
1268 FirstPrivateArgBuffer.resize(FirstPrivateArgSize, 0);
1269 auto Itr = FirstPrivateArgBuffer.begin();
1270 // Copy all host data to this buffer
1271 for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
1272 std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
1273 Itr = std::next(Itr, Info.AlignedSize);
1274 }
1275 // Allocate target memory
1276 void *TgtPtr =
1277 Device.allocData(FirstPrivateArgSize, FirstPrivateArgBuffer.data());
1278 if (TgtPtr == nullptr) {
1279 DP("Failed to allocate target memory for private arguments.\n"){};
1280 return OFFLOAD_FAIL(~0);
1281 }
1282 TgtPtrs.push_back(TgtPtr);
1283 DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD "\n",{}
1284 FirstPrivateArgSize, DPxPTR(TgtPtr)){};
1285 // Transfer data to target device
1286 int Ret = Device.submitData(TgtPtr, FirstPrivateArgBuffer.data(),
1287 FirstPrivateArgSize, AsyncInfo);
1288 if (Ret != OFFLOAD_SUCCESS(0)) {
1289 DP("Failed to submit data of private arguments.\n"){};
1290 return OFFLOAD_FAIL(~0);
1291 }
1292 // Fill in all placeholder pointers
1293 auto TP = reinterpret_cast<uintptr_t>(TgtPtr);
1294 for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
1295 void *&Ptr = TgtArgs[Info.Index];
1296 assert(Ptr == nullptr && "Target pointer is already set by mistaken")(static_cast <bool> (Ptr == nullptr && "Target pointer is already set by mistaken"
) ? void (0) : __assert_fail ("Ptr == nullptr && \"Target pointer is already set by mistaken\""
, "openmp/libomptarget/src/omptarget.cpp", 1296, __extension__
__PRETTY_FUNCTION__))
;
1297 Ptr = reinterpret_cast<void *>(TP);
1298 TP += Info.AlignedSize;
1299 DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD{}
1300 "\n",{}
1301 DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin,{}
1302 DPxPTR(Ptr)){};
1303 }
1304 }
1305
1306 return OFFLOAD_SUCCESS(0);
1307 }
1308
1309 /// Free all target memory allocated for private arguments
1310 int free() {
1311 for (void *P : TgtPtrs) {
1312 int Ret = Device.deleteData(P);
1313 if (Ret != OFFLOAD_SUCCESS(0)) {
1314 DP("Deallocation of (first-)private arrays failed.\n"){};
1315 return OFFLOAD_FAIL(~0);
1316 }
1317 }
1318
1319 TgtPtrs.clear();
1320
1321 return OFFLOAD_SUCCESS(0);
1322 }
1323};
1324
1325/// Process data before launching the kernel, including calling targetDataBegin
1326/// to map and transfer data to target device, transferring (first-)private
1327/// variables.
1328static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
1329 int32_t ArgNum, void **ArgBases, void **Args,
1330 int64_t *ArgSizes, int64_t *ArgTypes,
1331 map_var_info_t *ArgNames, void **ArgMappers,
1332 SmallVector<void *> &TgtArgs,
1333 SmallVector<ptrdiff_t> &TgtOffsets,
1334 PrivateArgumentManagerTy &PrivateArgumentManager,
1335 AsyncInfoTy &AsyncInfo) {
1336 TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("mappingBeforeTargetRegion"
, SI.getProfileLocation())
;
1337 DeviceTy &Device = *PM->Devices[DeviceId];
1338 int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
1339 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
1340 if (Ret != OFFLOAD_SUCCESS(0)) {
1341 REPORT("Call to targetDataBegin failed, abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataBegin failed, abort target.\n"); } while
(0);
;
1342 return OFFLOAD_FAIL(~0);
1343 }
1344
1345 // List of (first-)private arrays allocated for this target region
1346 SmallVector<int> TgtArgsPositions(ArgNum, -1);
1347
1348 for (int32_t I = 0; I < ArgNum; ++I) {
1349 if (!(ArgTypes[I] & OMP_TGT_MAPTYPE_TARGET_PARAM)) {
1350 // This is not a target parameter, do not push it into TgtArgs.
1351 // Check for lambda mapping.
1352 if (isLambdaMapping(ArgTypes[I])) {
1353 assert((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) &&(static_cast <bool> ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF
) && "PTR_AND_OBJ must be also MEMBER_OF.") ? void (0
) : __assert_fail ("(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && \"PTR_AND_OBJ must be also MEMBER_OF.\""
, "openmp/libomptarget/src/omptarget.cpp", 1354, __extension__
__PRETTY_FUNCTION__))
1354 "PTR_AND_OBJ must be also MEMBER_OF.")(static_cast <bool> ((ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF
) && "PTR_AND_OBJ must be also MEMBER_OF.") ? void (0
) : __assert_fail ("(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && \"PTR_AND_OBJ must be also MEMBER_OF.\""
, "openmp/libomptarget/src/omptarget.cpp", 1354, __extension__
__PRETTY_FUNCTION__))
;
1355 unsigned Idx = getParentIndex(ArgTypes[I]);
1356 int TgtIdx = TgtArgsPositions[Idx];
1357 assert(TgtIdx != -1 && "Base address must be translated already.")(static_cast <bool> (TgtIdx != -1 && "Base address must be translated already."
) ? void (0) : __assert_fail ("TgtIdx != -1 && \"Base address must be translated already.\""
, "openmp/libomptarget/src/omptarget.cpp", 1357, __extension__
__PRETTY_FUNCTION__))
;
1358 // The parent lambda must be processed already and it must be the last
1359 // in TgtArgs and TgtOffsets arrays.
1360 void *HstPtrVal = Args[I];
1361 void *HstPtrBegin = ArgBases[I];
1362 void *HstPtrBase = Args[Idx];
1363 bool IsLast, IsHostPtr; // IsLast is unused.
1364 void *TgtPtrBase =
1365 (void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]);
1366 DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)){};
1367 uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
1368 void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
1369 void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation();
1370 TargetPointerResultTy TPR = Device.getTgtPtrBegin(
1371 HstPtrVal, ArgSizes[I], IsLast, /*UpdateRefCount=*/false,
1372 /*UseHoldRefCount=*/false, IsHostPtr);
1373 PointerTgtPtrBegin = TPR.TargetPointer;
1374 if (!TPR.isPresent()) {
1375 DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",{}
1376 DPxPTR(HstPtrVal)){};
1377 continue;
1378 }
1379 if (IsHostPtr) {
1380 DP("Unified memory is active, no need to map lambda captured"{}
1381 "variable (" DPxMOD ")\n",{}
1382 DPxPTR(HstPtrVal)){};
1383 continue;
1384 }
1385 DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",{}
1386 DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)){};
1387 Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin,
1388 sizeof(void *), AsyncInfo);
1389 if (Ret != OFFLOAD_SUCCESS(0)) {
1390 REPORT("Copying data to device failed.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Copying data to device failed.\n"); } while (0);
;
1391 return OFFLOAD_FAIL(~0);
1392 }
1393 }
1394 continue;
1395 }
1396 void *HstPtrBegin = Args[I];
1397 void *HstPtrBase = ArgBases[I];
1398 void *TgtPtrBegin;
1399 map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
1400 ptrdiff_t TgtBaseOffset;
1401 bool IsLast, IsHostPtr; // unused.
1402 TargetPointerResultTy TPR;
1403 if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) {
1404 DP("Forwarding first-private value " DPxMOD " to the target construct\n",{}
1405 DPxPTR(HstPtrBase)){};
1406 TgtPtrBegin = HstPtrBase;
1407 TgtBaseOffset = 0;
1408 } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
1409 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
1410 const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);
1411 // If there is a next argument and it depends on the current one, we need
1412 // to allocate the private memory immediately. If this is not the case,
1413 // then the argument can be marked for optimization and packed with the
1414 // other privates.
1415 const bool AllocImmediately =
1416 (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
1417 Ret = PrivateArgumentManager.addArg(
1418 HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
1419 TgtArgs.size(), HstPtrName, AllocImmediately);
1420 if (Ret != OFFLOAD_SUCCESS(0)) {
1421 REPORT("Failed to process %sprivate argument " DPxMOD "\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process %sprivate argument " "0x%0*" "l" "x" "\n"
, (IsFirstPrivate ? "first-" : ""), ((int)(2 * sizeof(uintptr_t
))), ((uintptr_t)(HstPtrBegin))); } while (0);
1422 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process %sprivate argument " "0x%0*" "l" "x" "\n"
, (IsFirstPrivate ? "first-" : ""), ((int)(2 * sizeof(uintptr_t
))), ((uintptr_t)(HstPtrBegin))); } while (0);
;
1423 return OFFLOAD_FAIL(~0);
1424 }
1425 } else {
1426 if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)
1427 HstPtrBase = *reinterpret_cast<void **>(HstPtrBase);
1428 TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast,
1429 /*UpdateRefCount=*/false,
1430 /*UseHoldRefCount=*/false, IsHostPtr);
1431 TgtPtrBegin = TPR.TargetPointer;
1432 TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
1433#ifdef OMPTARGET_DEBUG
1434 void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);
1435 DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD "\n",{}
1436 DPxPTR(TgtPtrBase), DPxPTR(HstPtrBegin)){};
1437#endif
1438 }
1439 TgtArgsPositions[I] = TgtArgs.size();
1440 TgtArgs.push_back(TgtPtrBegin);
1441 TgtOffsets.push_back(TgtBaseOffset);
1442 }
1443
1444 assert(TgtArgs.size() == TgtOffsets.size() &&(static_cast <bool> (TgtArgs.size() == TgtOffsets.size(
) && "Size mismatch in arguments and offsets") ? void
(0) : __assert_fail ("TgtArgs.size() == TgtOffsets.size() && \"Size mismatch in arguments and offsets\""
, "openmp/libomptarget/src/omptarget.cpp", 1445, __extension__
__PRETTY_FUNCTION__))
1445 "Size mismatch in arguments and offsets")(static_cast <bool> (TgtArgs.size() == TgtOffsets.size(
) && "Size mismatch in arguments and offsets") ? void
(0) : __assert_fail ("TgtArgs.size() == TgtOffsets.size() && \"Size mismatch in arguments and offsets\""
, "openmp/libomptarget/src/omptarget.cpp", 1445, __extension__
__PRETTY_FUNCTION__))
;
1446
1447 // Pack and transfer first-private arguments
1448 Ret = PrivateArgumentManager.packAndTransfer(TgtArgs);
1449 if (Ret != OFFLOAD_SUCCESS(0)) {
1450 DP("Failed to pack and transfer first private arguments\n"){};
1451 return OFFLOAD_FAIL(~0);
1452 }
1453
1454 return OFFLOAD_SUCCESS(0);
1455}
1456
1457/// Process data after launching the kernel, including transferring data back to
1458/// host if needed and deallocating target memory of (first-)private variables.
1459static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
1460 int32_t ArgNum, void **ArgBases, void **Args,
1461 int64_t *ArgSizes, int64_t *ArgTypes,
1462 map_var_info_t *ArgNames, void **ArgMappers,
1463 PrivateArgumentManagerTy &PrivateArgumentManager,
1464 AsyncInfoTy &AsyncInfo) {
1465 TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope("mappingAfterTargetRegion"
, SI.getProfileLocation())
;
1466 DeviceTy &Device = *PM->Devices[DeviceId];
1467
1468 // Move data from device.
1469 int Ret = targetDataEnd(Loc, Device, ArgNum, ArgBases, Args, ArgSizes,
1470 ArgTypes, ArgNames, ArgMappers, AsyncInfo);
1471 if (Ret != OFFLOAD_SUCCESS(0)) {
1472 REPORT("Call to targetDataEnd failed, abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Call to targetDataEnd failed, abort target.\n"); } while (
0);
;
1473 return OFFLOAD_FAIL(~0);
1474 }
1475
1476 // Free target memory for private arguments
1477 Ret = PrivateArgumentManager.free();
1478 if (Ret != OFFLOAD_SUCCESS(0)) {
1479 REPORT("Failed to deallocate target memory for private args\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to deallocate target memory for private args\n"); }
while (0);
;
1480 return OFFLOAD_FAIL(~0);
1481 }
1482
1483 return OFFLOAD_SUCCESS(0);
1484}
1485} // namespace
1486
1487/// performs the same actions as data_begin in case arg_num is
1488/// non-zero and initiates run of the offloaded region on the target platform;
1489/// if arg_num is non-zero after the region execution is done it also
1490/// performs the same action as data_update and data_end above. This function
1491/// returns 0 if it was able to transfer the execution to a target and an
1492/// integer different from zero otherwise.
1493int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum,
1494 void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
1495 map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum,
1496 int32_t ThreadLimit, uint64_t Tripcount, int IsTeamConstruct,
1497 AsyncInfoTy &AsyncInfo) {
1498 int32_t DeviceId = Device.DeviceID;
1499 TableMap *TM = getTableMap(HostPtr);
1500 // No map for this host pointer found!
1501 if (!TM) {
1502 REPORT("Host ptr " DPxMOD " does not have a matching target pointer.\n",do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
1503 DPxPTR(HostPtr))do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Host ptr " "0x%0*" "l" "x" " does not have a matching target pointer.\n"
, ((int)(2 * sizeof(uintptr_t))), ((uintptr_t)(HostPtr))); } while
(0);
;
1504 return OFFLOAD_FAIL(~0);
1505 }
1506
1507 // get target table.
1508 __tgt_target_table *TargetTable = nullptr;
1509 {
1510 std::lock_guard<std::mutex> TrlTblLock(PM->TrlTblMtx);
1511 assert(TM->Table->TargetsTable.size() > (size_t)DeviceId &&(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1512, __extension__
__PRETTY_FUNCTION__))
1512 "Not expecting a device ID outside the table's bounds!")(static_cast <bool> (TM->Table->TargetsTable.size
() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"
) ? void (0) : __assert_fail ("TM->Table->TargetsTable.size() > (size_t)DeviceId && \"Not expecting a device ID outside the table's bounds!\""
, "openmp/libomptarget/src/omptarget.cpp", 1512, __extension__
__PRETTY_FUNCTION__))
;
1513 TargetTable = TM->Table->TargetsTable[DeviceId];
1514 }
1515 assert(TargetTable && "Global data has not been mapped\n")(static_cast <bool> (TargetTable && "Global data has not been mapped\n"
) ? void (0) : __assert_fail ("TargetTable && \"Global data has not been mapped\\n\""
, "openmp/libomptarget/src/omptarget.cpp", 1515, __extension__
__PRETTY_FUNCTION__))
;
1516
1517 DP("loop trip count is %" PRIu64 ".\n", Tripcount){};
1518
1519 // We need to keep bases and offsets separate. Sometimes (e.g. in OpenCL) we
1520 // need to manifest base pointers prior to launching a kernel. Even if we have
1521 // mapped an object only partially, e.g. A[N:M], although the kernel is
1522 // expected to access elements starting at address &A[N] and beyond, we still
1523 // need to manifest the base of the array &A[0]. In other cases, e.g. the COI
1524 // API, we need the begin address itself, i.e. &A[N], as the API operates on
1525 // begin addresses, not bases. That's why we pass args and offsets as two
1526 // separate entities so that each plugin can do what it needs. This behavior
1527 // was introdued via https://reviews.llvm.org/D33028 and commit 1546d319244c.
1528 SmallVector<void *> TgtArgs;
1529 SmallVector<ptrdiff_t> TgtOffsets;
1530
1531 PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo);
1532
1533 int Ret;
1534 if (ArgNum) {
1535 // Process data, such as data mapping, before launching the kernel
1536 Ret = processDataBefore(Loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
1537 ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs,
1538 TgtOffsets, PrivateArgumentManager, AsyncInfo);
1539 if (Ret != OFFLOAD_SUCCESS(0)) {
1540 REPORT("Failed to process data before launching the kernel.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process data before launching the kernel.\n"); }
while (0);
;
1541 return OFFLOAD_FAIL(~0);
1542 }
1543 }
1544
1545 // Launch device execution.
1546 void *TgtEntryPtr = TargetTable->EntriesBegin[TM->Index].addr;
1547 DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n",{}
1548 TargetTable->EntriesBegin[TM->Index].name, DPxPTR(TgtEntryPtr), TM->Index){};
1549
1550 {
1551 TIMESCOPE_WITH_NAME_AND_IDENT(SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(IsTeamConstruct
? "runTargetTeamRegion" : "runTargetRegion", SI.getProfileLocation
())
1552 IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", Loc)SourceInfo SI(Loc); llvm::TimeTraceScope TimeScope(IsTeamConstruct
? "runTargetTeamRegion" : "runTargetRegion", SI.getProfileLocation
())
;
1553 if (IsTeamConstruct)
1554 Ret = Device.runTeamRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
1555 TgtArgs.size(), TeamNum, ThreadLimit,
1556 Tripcount, AsyncInfo);
1557 else
1558 Ret = Device.runRegion(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),
1559 TgtArgs.size(), AsyncInfo);
1560 }
1561
1562 if (Ret != OFFLOAD_SUCCESS(0)) {
1563 REPORT("Executing target region abort target.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Executing target region abort target.\n"); } while (0);
;
1564 return OFFLOAD_FAIL(~0);
1565 }
1566
1567 if (ArgNum) {
1568 // Transfer data back and deallocate target memory for (first-)private
1569 // variables
1570 Ret = processDataAfter(Loc, DeviceId, HostPtr, ArgNum, ArgBases, Args,
1571 ArgSizes, ArgTypes, ArgNames, ArgMappers,
1572 PrivateArgumentManager, AsyncInfo);
1573 if (Ret != OFFLOAD_SUCCESS(0)) {
1574 REPORT("Failed to process data after launching the kernel.\n")do { fprintf(stderr, "Libomptarget" " error: "); fprintf(stderr
, "Failed to process data after launching the kernel.\n"); } while
(0);
;
1575 return OFFLOAD_FAIL(~0);
1576 }
1577 }
1578
1579 return OFFLOAD_SUCCESS(0);
1580}