MemoryManager.cpp
7.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
//===----------- MemoryManager.cpp - Target independent memory manager ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Functionality for managing target memory.
// It is very expensive to call alloc/free functions of target devices. The
// MemoryManagerTy in this file is to reduce the number of invocations of those
// functions by buffering allocated device memory. In this way, when a memory is
// not used, it will not be freed on the device directly. The buffer is
// organized in a number of buckets for efficient look up. A memory will go to
// corresponding bucket based on its size. When a new memory request comes in,
// it will first check whether there is free memory of same size. If yes,
// returns it directly. Otherwise, allocate one on device.
//
// It also provides a way to opt out the memory manager. Memory
// allocation/deallocation will only be managed if the requested size is less
// than SizeThreshold, which can be configured via an environment variable
// LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD.
//
//===----------------------------------------------------------------------===//
#include "MemoryManager.h"
#include "device.h"
#include "private.h"
#include "rtl.h"
namespace {
constexpr const size_t BucketSize[] = {
0, 1U << 2, 1U << 3, 1U << 4, 1U << 5, 1U << 6, 1U << 7,
1U << 8, 1U << 9, 1U << 10, 1U << 11, 1U << 12, 1U << 13};
constexpr const int NumBuckets = sizeof(BucketSize) / sizeof(BucketSize[0]);
/// The threshold to manage memory using memory manager. If the request size is
/// larger than \p SizeThreshold, the allocation will not be managed by the
/// memory manager. This variable can be configured via an env \p
/// LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD. By default, the value is 8KB.
size_t SizeThreshold = 1U << 13;
/// Find the previous number that is power of 2 given a number that is not power
/// of 2.
size_t floorToPowerOfTwo(size_t Num) {
Num |= Num >> 1;
Num |= Num >> 2;
Num |= Num >> 4;
Num |= Num >> 8;
Num |= Num >> 16;
Num |= Num >> 32;
Num += 1;
return Num >> 1;
}
/// Find a suitable bucket
int findBucket(size_t Size) {
const size_t F = floorToPowerOfTwo(Size);
DP("findBucket: Size %zu is floored to %zu.\n", Size, F);
int L = 0, H = NumBuckets - 1;
while (H - L > 1) {
int M = (L + H) >> 1;
if (BucketSize[M] == F)
return M;
if (BucketSize[M] > F)
H = M - 1;
else
L = M;
}
assert(L >= 0 && L < NumBuckets && "L is out of range");
DP("findBucket: Size %zu goes to bucket %d\n", Size, L);
return L;
}
} // namespace
MemoryManagerTy::MemoryManagerTy(DeviceTy &Dev, size_t Threshold)
: FreeLists(NumBuckets), FreeListLocks(NumBuckets), Device(Dev) {
if (Threshold)
SizeThreshold = Threshold;
}
MemoryManagerTy::~MemoryManagerTy() {
// TODO: There is a little issue that target plugin is destroyed before this
// object, therefore the memory free will not succeed.
// Deallocate all memory in map
for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end(); ++Itr) {
assert(Itr->second.Ptr && "nullptr in map table");
deleteOnDevice(Itr->second.Ptr);
}
}
void *MemoryManagerTy::allocateOnDevice(size_t Size, void *HstPtr) const {
return Device.RTL->data_alloc(Device.RTLDeviceID, Size, HstPtr);
}
int MemoryManagerTy::deleteOnDevice(void *Ptr) const {
return Device.RTL->data_delete(Device.RTLDeviceID, Ptr);
}
void *MemoryManagerTy::freeAndAllocate(size_t Size, void *HstPtr) {
std::vector<void *> RemoveList;
// Deallocate all memory in FreeList
for (int I = 0; I < NumBuckets; ++I) {
FreeListTy &List = FreeLists[I];
std::lock_guard<std::mutex> Lock(FreeListLocks[I]);
if (List.empty())
continue;
for (const NodeTy &N : List) {
deleteOnDevice(N.Ptr);
RemoveList.push_back(N.Ptr);
}
FreeLists[I].clear();
}
// Remove all nodes in the map table which have been released
if (!RemoveList.empty()) {
std::lock_guard<std::mutex> LG(MapTableLock);
for (void *P : RemoveList)
PtrToNodeTable.erase(P);
}
// Try allocate memory again
return allocateOnDevice(Size, HstPtr);
}
void *MemoryManagerTy::allocateOrFreeAndAllocateOnDevice(size_t Size,
void *HstPtr) {
void *TgtPtr = allocateOnDevice(Size, HstPtr);
// We cannot get memory from the device. It might be due to OOM. Let's
// free all memory in FreeLists and try again.
if (TgtPtr == nullptr) {
DP("Failed to get memory on device. Free all memory in FreeLists and "
"try again.\n");
TgtPtr = freeAndAllocate(Size, HstPtr);
}
#ifdef OMPTARGET_DEBUG
if (TgtPtr == nullptr)
DP("Still cannot get memory on device probably because the device is "
"OOM.\n");
#endif
return TgtPtr;
}
void *MemoryManagerTy::allocate(size_t Size, void *HstPtr) {
// If the size is zero, we will not bother the target device. Just return
// nullptr directly.
if (Size == 0)
return nullptr;
DP("MemoryManagerTy::allocate: size %zu with host pointer " DPxMOD ".\n",
Size, DPxPTR(HstPtr));
// If the size is greater than the threshold, allocate it directly from
// device.
if (Size > SizeThreshold) {
DP("%zu is greater than the threshold %zu. Allocate it directly from "
"device\n",
Size, SizeThreshold);
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
return TgtPtr;
}
NodeTy *NodePtr = nullptr;
// Try to get a node from FreeList
{
const int B = findBucket(Size);
FreeListTy &List = FreeLists[B];
NodeTy TempNode(Size, nullptr);
std::lock_guard<std::mutex> LG(FreeListLocks[B]);
FreeListTy::const_iterator Itr = List.find(TempNode);
if (Itr != List.end()) {
NodePtr = &Itr->get();
List.erase(Itr);
}
}
#ifdef OMPTARGET_DEBUG
if (NodePtr != nullptr)
DP("Find one node " DPxMOD " in the bucket.\n", DPxPTR(NodePtr));
#endif
// We cannot find a valid node in FreeLists. Let's allocate on device and
// create a node for it.
if (NodePtr == nullptr) {
DP("Cannot find a node in the FreeLists. Allocate on device.\n");
// Allocate one on device
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
if (TgtPtr == nullptr)
return nullptr;
// Create a new node and add it into the map table
{
std::lock_guard<std::mutex> Guard(MapTableLock);
auto Itr = PtrToNodeTable.emplace(TgtPtr, NodeTy(Size, TgtPtr));
NodePtr = &Itr.first->second;
}
DP("Node address " DPxMOD ", target pointer " DPxMOD ", size %zu\n",
DPxPTR(NodePtr), DPxPTR(TgtPtr), Size);
}
assert(NodePtr && "NodePtr should not be nullptr at this point");
return NodePtr->Ptr;
}
int MemoryManagerTy::free(void *TgtPtr) {
DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));
NodeTy *P = nullptr;
// Look it up into the table
{
std::lock_guard<std::mutex> G(MapTableLock);
auto Itr = PtrToNodeTable.find(TgtPtr);
// We don't remove the node from the map table because the map does not
// change.
if (Itr != PtrToNodeTable.end())
P = &Itr->second;
}
// The memory is not managed by the manager
if (P == nullptr) {
DP("Cannot find its node. Delete it on device directly.\n");
return deleteOnDevice(TgtPtr);
}
// Insert the node to the free list
const int B = findBucket(P->Size);
DP("Found its node " DPxMOD ". Insert it to bucket %d.\n", DPxPTR(P), B);
{
std::lock_guard<std::mutex> G(FreeListLocks[B]);
FreeLists[B].insert(*P);
}
return OFFLOAD_SUCCESS;
}