sync.cu
5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
//===------------ sync.cu - GPU OpenMP synchronizations ---------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Include all synchronization.
//
//===----------------------------------------------------------------------===//
#include "common/omptarget.h"
#include "target_impl.h"
////////////////////////////////////////////////////////////////////////////////
// KMP Ordered calls
////////////////////////////////////////////////////////////////////////////////
EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_ordered\n");
}
EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_end_ordered\n");
}
////////////////////////////////////////////////////////////////////////////////
// KMP Barriers
////////////////////////////////////////////////////////////////////////////////
// a team is a block: we can use CUDA native synchronization mechanism
// FIXME: what if not all threads (warps) participate to the barrier?
// We may need to implement it differently
EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
__kmpc_barrier(loc_ref, tid);
PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
return 0;
}
EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
if (checkRuntimeUninitialized(loc_ref)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
"Expected SPMD mode with uninitialized runtime.");
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
int numberOfActiveOMPThreads =
GetNumberOfOmpThreads(checkSPMDMode(loc_ref));
if (numberOfActiveOMPThreads > 1) {
if (checkSPMDMode(loc_ref)) {
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
// The #threads parameter must be rounded up to the WARPSIZE.
int threads =
WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
PRINT(LD_SYNC,
"call kmpc_barrier with %d omp threads, sync parameter %d\n",
(int)numberOfActiveOMPThreads, (int)threads);
__kmpc_impl_named_sync(threads);
}
} else {
// Still need to flush the memory per the standard.
__kmpc_flush(loc_ref);
} // numberOfActiveOMPThreads > 1
PRINT0(LD_SYNC, "completed kmpc_barrier\n");
}
}
// Emit a simple barrier call in SPMD mode. Assumes the caller is in an L0
// parallel region and that all worker threads participate.
EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
__kmpc_impl_syncthreads();
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
}
////////////////////////////////////////////////////////////////////////////////
// KMP MASTER
////////////////////////////////////////////////////////////////////////////////
EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_master\n");
return IsTeamMaster(global_tid);
}
EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_end_master\n");
ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
}
////////////////////////////////////////////////////////////////////////////////
// KMP SINGLE
////////////////////////////////////////////////////////////////////////////////
EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_single\n");
// decide to implement single with master; master get the single
return IsTeamMaster(global_tid);
}
EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_end_single\n");
// decide to implement single with master: master get the single
ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
// sync barrier is explicitly called... so that is not a problem
}
////////////////////////////////////////////////////////////////////////////////
// Flush
////////////////////////////////////////////////////////////////////////////////
EXTERN void __kmpc_flush(kmp_Ident *loc) {
PRINT0(LD_IO, "call kmpc_flush\n");
__kmpc_impl_threadfence();
}
////////////////////////////////////////////////////////////////////////////////
// Vote
////////////////////////////////////////////////////////////////////////////////
EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
return __kmpc_impl_activemask();
}
////////////////////////////////////////////////////////////////////////////////
// Syncwarp
////////////////////////////////////////////////////////////////////////////////
EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
PRINT0(LD_IO, "call __kmpc_syncwarp\n");
__kmpc_impl_syncwarp(Mask);
}