atmi.h
5.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/*===--------------------------------------------------------------------------
* ATMI (Asynchronous Task and Memory Interface)
*
* This file is distributed under the MIT License. See LICENSE.txt for details.
*===------------------------------------------------------------------------*/
#ifndef INCLUDE_ATMI_H_
#define INCLUDE_ATMI_H_
#define ROCM_VERSION_MAJOR 3
#define ROCM_VERSION_MINOR 2
/** \defgroup enumerations Enumerated Types
* @{
*/
/**
* @brief Status codes.
*/
typedef enum atmi_status_t {
/**
* The function has been executed successfully.
*/
ATMI_STATUS_SUCCESS = 0,
/**
* A undocumented error has occurred.
*/
ATMI_STATUS_UNKNOWN = 1,
/**
* A generic error has occurred.
*/
ATMI_STATUS_ERROR = 2,
} atmi_status_t;
/**
* @brief Device Types.
*/
typedef enum atmi_devtype_s {
ATMI_DEVTYPE_CPU = 0x0001,
ATMI_DEVTYPE_iGPU = 0x0010, // Integrated GPU
ATMI_DEVTYPE_dGPU = 0x0100, // Discrete GPU
ATMI_DEVTYPE_GPU = ATMI_DEVTYPE_iGPU | ATMI_DEVTYPE_dGPU, // Any GPU
ATMI_DEVTYPE_ALL = 0x111 // Union of all device types
} atmi_devtype_t;
/**
* @brief Memory Access Type.
*/
typedef enum atmi_memtype_s {
ATMI_MEMTYPE_FINE_GRAINED = 0,
ATMI_MEMTYPE_COARSE_GRAINED = 1,
ATMI_MEMTYPE_ANY
} atmi_memtype_t;
/**
* @brief ATMI Memory Fences for Tasks.
*/
typedef enum atmi_task_fence_scope_s {
/**
* No memory fence applied; external fences have to be applied around the task
* launch/completion.
*/
ATMI_FENCE_SCOPE_NONE = 0,
/**
* The fence is applied to the device.
*/
ATMI_FENCE_SCOPE_DEVICE = 1,
/**
* The fence is applied to the entire system.
*/
ATMI_FENCE_SCOPE_SYSTEM = 2
} atmi_task_fence_scope_t;
/** @} */
/** \defgroup common Common ATMI Structures
* @{
*/
/**
* @brief ATMI Compute Place
*/
typedef struct atmi_place_s {
/**
* The node in a cluster where computation should occur.
* Default is node_id = 0 for local computations.
*/
unsigned int node_id;
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t type;
/**
* The device ordinal number ordered by runtime; -1 for any
*/
int device_id;
} atmi_place_t;
/**
* @brief ATMI Memory Place
*/
typedef struct atmi_mem_place_s {
/**
* The node in a cluster where computation should occur.
* Default is node_id = 0 for local computations.
*/
unsigned int node_id;
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t dev_type;
/**
* The device ordinal number ordered by runtime; -1 for any
*/
int dev_id;
// atmi_memtype_t mem_type; // Fine grained or Coarse grained
/**
* The memory space/region ordinal number ordered by runtime; -1 for any
*/
int mem_id;
} atmi_mem_place_t;
/**
* @brief ATMI Memory Space/region Structure
*/
typedef struct atmi_memory_s {
/**
* Memory capacity
*/
unsigned long int capacity;
/**
* Memory type
*/
atmi_memtype_t type;
} atmi_memory_t;
/**
* @brief ATMI Device Structure
*/
typedef struct atmi_device_s {
/**
* Device type: CPU, GPU or DSP
*/
atmi_devtype_t type;
/**
* Array of memory spaces/regions that are accessible
* from this device.
*/
atmi_memory_t *memories;
} atmi_device_t;
/**
* @brief ATMI Machine Structure
*/
typedef struct atmi_machine_s {
/**
* The number of devices categorized by the device type
*/
unsigned int device_count_by_type[ATMI_DEVTYPE_ALL];
/**
* The device structures categorized by the device type
*/
atmi_device_t *devices_by_type[ATMI_DEVTYPE_ALL];
} atmi_machine_t;
// Below are some helper macros that can be used to setup
// some of the ATMI data structures.
#define ATMI_PLACE_CPU(node, cpu_id) \
{ .node_id = node, .type = ATMI_DEVTYPE_CPU, .device_id = cpu_id }
#define ATMI_PLACE_GPU(node, gpu_id) \
{ .node_id = node, .type = ATMI_DEVTYPE_GPU, .device_id = gpu_id }
#define ATMI_MEM_PLACE_CPU(node, cpu_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \
.mem_id = -1 \
}
#define ATMI_MEM_PLACE_GPU(node, gpu_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \
.mem_id = -1 \
}
#define ATMI_MEM_PLACE_CPU_MEM(node, cpu_id, cpu_mem_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_CPU, .dev_id = cpu_id, \
.mem_id = cpu_mem_id \
}
#define ATMI_MEM_PLACE_GPU_MEM(node, gpu_id, gpu_mem_id) \
{ \
.node_id = node, .dev_type = ATMI_DEVTYPE_GPU, .dev_id = gpu_id, \
.mem_id = gpu_mem_id \
}
#define ATMI_MEM_PLACE(d_type, d_id, m_id) \
{ .node_id = 0, .dev_type = d_type, .dev_id = d_id, .mem_id = m_id }
#endif // INCLUDE_ATMI_H_