5
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
6
//
7
7
// ===----------------------------------------------------------------------===//
8
- #include " hsa_api.h"
9
- #include " impl_runtime.h"
10
- #include " internal.h"
11
8
#include " rt.h"
12
9
#include < memory>
13
10
14
11
/*
15
12
* Data
16
13
*/
17
14
18
- static hsa_status_t invoke_hsa_copy (hsa_signal_t sig, void *dest,
19
- const void *src, size_t size,
20
- hsa_agent_t agent) {
15
+ // host pointer (either src or dest) must be locked via hsa_amd_memory_lock
16
+ static hsa_status_t invoke_hsa_copy (hsa_signal_t signal, void *dest,
17
+ hsa_agent_t agent, const void *src,
18
+ size_t size) {
21
19
const hsa_signal_value_t init = 1 ;
22
20
const hsa_signal_value_t success = 0 ;
23
- hsa_signal_store_screlease (sig , init);
21
+ hsa_signal_store_screlease (signal , init);
24
22
25
- hsa_status_t err =
26
- hsa_amd_memory_async_copy (dest, agent, src, agent, size, 0 , NULL , sig );
27
- if (err != HSA_STATUS_SUCCESS) {
23
+ hsa_status_t err = hsa_amd_memory_async_copy (dest, agent, src, agent, size, 0 ,
24
+ nullptr , signal );
25
+ if (err != HSA_STATUS_SUCCESS)
28
26
return err;
29
- }
30
27
31
28
// async_copy reports success by decrementing and failure by setting to < 0
32
29
hsa_signal_value_t got = init;
33
- while (got == init) {
34
- got = hsa_signal_wait_scacquire (sig , HSA_SIGNAL_CONDITION_NE, init,
30
+ while (got == init)
31
+ got = hsa_signal_wait_scacquire (signal , HSA_SIGNAL_CONDITION_NE, init,
35
32
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
36
- }
37
33
38
- if (got != success) {
34
+ if (got != success)
39
35
return HSA_STATUS_ERROR;
40
- }
41
36
42
37
return err;
43
38
}
@@ -48,19 +43,58 @@ struct implFreePtrDeletor {
48
43
}
49
44
};
50
45
46
+ enum CopyDirection { H2D, D2H };
47
+
48
+ static hsa_status_t locking_async_memcpy (enum CopyDirection direction,
49
+ hsa_signal_t signal, void *dest,
50
+ hsa_agent_t agent, void *src,
51
+ void *lockingPtr, size_t size) {
52
+ hsa_status_t err;
53
+
54
+ void *lockedPtr = nullptr ;
55
+ err = hsa_amd_memory_lock (lockingPtr, size, nullptr , 0 , (void **)&lockedPtr);
56
+ if (err != HSA_STATUS_SUCCESS)
57
+ return err;
58
+
59
+ switch (direction) {
60
+ case H2D:
61
+ err = invoke_hsa_copy (signal, dest, agent, lockedPtr, size);
62
+ break ;
63
+ case D2H:
64
+ err = invoke_hsa_copy (signal, lockedPtr, agent, src, size);
65
+ break ;
66
+ default :
67
+ err = HSA_STATUS_ERROR; // fall into unlock before returning
68
+ }
69
+
70
+ if (err != HSA_STATUS_SUCCESS) {
71
+ // do not leak locked host pointers, but discard potential error message
72
+ hsa_amd_memory_unlock (lockingPtr);
73
+ return err;
74
+ }
75
+
76
+ err = hsa_amd_memory_unlock (lockingPtr);
77
+ if (err != HSA_STATUS_SUCCESS)
78
+ return err;
79
+
80
+ return HSA_STATUS_SUCCESS;
81
+ }
82
+
51
83
hsa_status_t impl_memcpy_h2d (hsa_signal_t signal, void *deviceDest,
52
- const void *hostSrc, size_t size,
53
- hsa_agent_t agent ,
84
+ void *hostSrc, size_t size,
85
+ hsa_agent_t device_agent ,
54
86
hsa_amd_memory_pool_t MemoryPool) {
55
- hsa_status_t rc = hsa_memory_copy (deviceDest, hostSrc, size) ;
87
+ hsa_status_t err ;
56
88
57
- // hsa_memory_copy sometimes fails in situations where
89
+ err = locking_async_memcpy (CopyDirection::H2D, signal, deviceDest,
90
+ device_agent, hostSrc, hostSrc, size);
91
+
92
+ if (err == HSA_STATUS_SUCCESS)
93
+ return err;
94
+
95
+ // async memcpy sometimes fails in situations where
58
96
// allocate + copy succeeds. Looks like it might be related to
59
97
// locking part of a read only segment. Fall back for now.
60
- if (rc == HSA_STATUS_SUCCESS) {
61
- return HSA_STATUS_SUCCESS;
62
- }
63
-
64
98
void *tempHostPtr;
65
99
hsa_status_t ret = core::Runtime::HostMalloc (&tempHostPtr, size, MemoryPool);
66
100
if (ret != HSA_STATUS_SUCCESS) {
@@ -70,26 +104,26 @@ hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
70
104
std::unique_ptr<void , implFreePtrDeletor> del (tempHostPtr);
71
105
memcpy (tempHostPtr, hostSrc, size);
72
106
73
- if (invoke_hsa_copy (signal, deviceDest, tempHostPtr, size, agent) !=
74
- HSA_STATUS_SUCCESS) {
75
- return HSA_STATUS_ERROR;
76
- }
77
- return HSA_STATUS_SUCCESS;
107
+ return locking_async_memcpy (CopyDirection::H2D, signal, deviceDest,
108
+ device_agent, tempHostPtr, tempHostPtr, size);
78
109
}
79
110
80
- hsa_status_t impl_memcpy_d2h (hsa_signal_t signal, void *dest ,
81
- const void *deviceSrc, size_t size,
82
- hsa_agent_t agent ,
111
+ hsa_status_t impl_memcpy_d2h (hsa_signal_t signal, void *hostDest ,
112
+ void *deviceSrc, size_t size,
113
+ hsa_agent_t deviceAgent ,
83
114
hsa_amd_memory_pool_t MemoryPool) {
84
- hsa_status_t rc = hsa_memory_copy (dest, deviceSrc, size);
115
+ hsa_status_t err;
116
+
117
+ // device has always visibility over both pointers, so use that
118
+ err = locking_async_memcpy (CopyDirection::D2H, signal, hostDest, deviceAgent,
119
+ deviceSrc, hostDest, size);
120
+
121
+ if (err == HSA_STATUS_SUCCESS)
122
+ return err;
85
123
86
124
// hsa_memory_copy sometimes fails in situations where
87
125
// allocate + copy succeeds. Looks like it might be related to
88
126
// locking part of a read only segment. Fall back for now.
89
- if (rc == HSA_STATUS_SUCCESS) {
90
- return HSA_STATUS_SUCCESS;
91
- }
92
-
93
127
void *tempHostPtr;
94
128
hsa_status_t ret = core::Runtime::HostMalloc (&tempHostPtr, size, MemoryPool);
95
129
if (ret != HSA_STATUS_SUCCESS) {
@@ -98,11 +132,11 @@ hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *dest,
98
132
}
99
133
std::unique_ptr<void , implFreePtrDeletor> del (tempHostPtr);
100
134
101
- if (invoke_hsa_copy (signal, tempHostPtr, deviceSrc, size, agent) !=
102
- HSA_STATUS_SUCCESS) {
135
+ err = locking_async_memcpy (CopyDirection::D2H, signal, tempHostPtr,
136
+ deviceAgent, deviceSrc, tempHostPtr, size);
137
+ if (err != HSA_STATUS_SUCCESS)
103
138
return HSA_STATUS_ERROR;
104
- }
105
139
106
- memcpy (dest , tempHostPtr, size);
140
+ memcpy (hostDest , tempHostPtr, size);
107
141
return HSA_STATUS_SUCCESS;
108
142
}
0 commit comments