llvm.amdgcn.fmad.ftz.ll
4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c)
; GCN-LABEL: {{^}}mad_f32:
; GCN: v_ma{{[dc]}}_f32
define amdgpu_kernel void @mad_f32(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float %c.val)
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}mad_f32_imm_a:
; GCN: v_madmk_f32 {{v[0-9]+}}, {{v[0-9]+}}, 0x41000000,
define amdgpu_kernel void @mad_f32_imm_a(
float addrspace(1)* %r,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float 8.0, float %b.val, float %c.val)
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}mad_f32_imm_b:
; GCN: v_mov_b32_e32 [[KB:v[0-9]+]], 0x41000000
; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{[s][0-9]+}}, [[KB]]
define amdgpu_kernel void @mad_f32_imm_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%c.val = load float, float addrspace(1)* %c
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float 8.0, float %c.val)
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}mad_f32_imm_c:
; GCN: v_mov_b32_e32 [[C:v[0-9]+]], 0x41000000
; GCN: s_load_dword [[A:s[0-9]+]]
; GCN: s_load_dword [[B:s[0-9]+]]
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
; GCN: v_mac_f32_e32 [[C]], {{s[0-9]+}}, [[VB]]{{$}}
define amdgpu_kernel void @mad_f32_imm_c(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float 8.0)
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}mad_f32_neg_b:
; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @mad_f32_neg_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%neg.b = fsub float -0.0, %b.val
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val)
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}mad_f32_abs_b:
; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
define amdgpu_kernel void @mad_f32_abs_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%abs.b = call float @llvm.fabs.f32(float %b.val)
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %abs.b, float %c.val)
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}mad_f32_neg_abs_b:
; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}}
define amdgpu_kernel void @mad_f32_neg_abs_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%abs.b = call float @llvm.fabs.f32(float %b.val)
%neg.abs.b = fsub float -0.0, %abs.b
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val)
store float %r.val, float addrspace(1)* %r
ret void
}
declare float @llvm.fabs.f32(float)