divergent-branch-uniform-condition.ll
5.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: The checks for opt are NOT added by the update script. Those
; checks are looking for the absence of specific metadata, which
; cannot be expressed reliably by the generated checks.
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=ISA
; RUN: opt --amdgpu-annotate-uniform -S %s | FileCheck %s -check-prefix=UNIFORM
; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s | FileCheck %s -check-prefix=CONTROLFLOW
; This module creates a divergent branch in block Flow2. The branch is
; marked as divergent by the divergence analysis but the condition is
; not. This test ensures that the divergence of the branch is tested,
; not its condition, so that branch is correctly emitted as divergent.
target triple = "amdgcn-mesa-mesa3d"
define amdgpu_ps void @main(i32 %0, float %1) {
; ISA-LABEL: main:
; ISA: ; %bb.0: ; %start
; ISA-NEXT: v_readfirstlane_b32 s0, v0
; ISA-NEXT: s_mov_b32 m0, s0
; ISA-NEXT: s_mov_b32 s0, 0
; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
; ISA-NEXT: s_mov_b64 s[2:3], 0
; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
; ISA-NEXT: s_branch BB0_3
; ISA-NEXT: BB0_1: ; %Flow1
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_or_b64 exec, exec, s[8:9]
; ISA-NEXT: s_add_i32 s0, s0, 1
; ISA-NEXT: s_mov_b64 s[8:9], 0
; ISA-NEXT: BB0_2: ; %Flow
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_and_b64 s[10:11], exec, s[6:7]
; ISA-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3]
; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
; ISA-NEXT: s_and_b64 s[8:9], s[8:9], exec
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; ISA-NEXT: s_andn2_b64 exec, exec, s[2:3]
; ISA-NEXT: s_cbranch_execz BB0_6
; ISA-NEXT: BB0_3: ; %loop
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
; ISA-NEXT: s_or_b64 s[6:7], s[6:7], exec
; ISA-NEXT: s_cmp_lt_u32 s0, 32
; ISA-NEXT: s_mov_b64 s[8:9], -1
; ISA-NEXT: s_cbranch_scc0 BB0_2
; ISA-NEXT: ; %bb.4: ; %endif1
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_mov_b64 s[6:7], -1
; ISA-NEXT: s_and_saveexec_b64 s[8:9], vcc
; ISA-NEXT: s_cbranch_execz BB0_1
; ISA-NEXT: ; %bb.5: ; %endif2
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_xor_b64 s[6:7], exec, -1
; ISA-NEXT: s_branch BB0_1
; ISA-NEXT: BB0_6: ; %Flow2
; ISA-NEXT: s_or_b64 exec, exec, s[2:3]
; ISA-NEXT: v_mov_b32_e32 v1, 0
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
; ISA-NEXT: ; %bb.7: ; %if1
; ISA-NEXT: v_sqrt_f32_e32 v1, v0
; ISA-NEXT: ; %bb.8: ; %endloop
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm
; ISA-NEXT: s_endpgm
start:
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
br label %loop
loop: ; preds = %Flow, %start
%v1 = phi i32 [ 0, %start ], [ %6, %Flow ]
%v2 = icmp ugt i32 %v1, 31
%2 = xor i1 %v2, true
br i1 %2, label %endif1, label %Flow
Flow1: ; preds = %endif2, %endif1
%3 = phi i32 [ %v5, %endif2 ], [ undef, %endif1 ]
%4 = phi i1 [ false, %endif2 ], [ true, %endif1 ]
br label %Flow
; UNIFORM-LABEL: Flow2:
; UNIFORM-NEXT: br i1 %8, label %if1, label %endloop
; UNIFORM-NOT: !amdgpu.uniform
; UNIFORM: if1:
; CONTROLFLOW-LABEL: Flow2:
; CONTROLFLOW-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %{{.*}})
; CONTROLFLOW-NEXT: [[IF:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %{{.*}})
; CONTROLFLOW-NEXT: [[COND:%.*]] = extractvalue { i1, i64 } [[IF]], 0
; CONTROLFLOW-NEXT: %{{.*}} = extractvalue { i1, i64 } [[IF]], 1
; CONTROLFLOW-NEXT: br i1 [[COND]], label %if1, label %endloop
Flow2: ; preds = %Flow
br i1 %8, label %if1, label %endloop
if1: ; preds = %Flow2
%v3 = call float @llvm.sqrt.f32(float %v0)
br label %endloop
endif1: ; preds = %loop
%v4 = fcmp ogt float %v0, 0.000000e+00
%5 = xor i1 %v4, true
br i1 %5, label %endif2, label %Flow1
Flow: ; preds = %Flow1, %loop
%6 = phi i32 [ %3, %Flow1 ], [ undef, %loop ]
%7 = phi i1 [ %4, %Flow1 ], [ true, %loop ]
%8 = phi i1 [ false, %Flow1 ], [ true, %loop ]
br i1 %7, label %Flow2, label %loop
endif2: ; preds = %endif1
%v5 = add i32 %v1, 1
br label %Flow1
endloop: ; preds = %if1, %Flow2
%v6 = phi float [ 0.000000e+00, %Flow2 ], [ %v3, %if1 ]
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
ret void
}
; Function Attrs: nounwind readnone speculatable willreturn
declare float @llvm.sqrt.f32(float) #0
; Function Attrs: nounwind readnone speculatable
declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1
; Function Attrs: inaccessiblememonly nounwind writeonly
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2
attributes #0 = { nounwind readnone speculatable willreturn }
attributes #1 = { nounwind readnone speculatable }
attributes #2 = { inaccessiblememonly nounwind writeonly }