machine-cse-commute-target-flags.mir
4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=machine-cse -o - %s | FileCheck %s
--- |
define void @commute_instruction_subreg_target_flag() { ret void }
define void @commute_target_flag_frame_index() { ret void }
define void @commute_target_flag_global() { ret void }
define void @commute_target_flag_global_offset() { ret void }
define void @commute_target_flag_global_offset_mismatch() { ret void }
declare void @func()
@gv = external addrspace(1) global i32
...
# Make sure the subreg index is cleared when commuting a register and immediate.
---
name: commute_instruction_subreg_target_flag
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: commute_instruction_subreg_target_flag
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]].sub1, 64, 0, implicit $exec
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]]
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = V_ADD_U32_e64 %0.sub1, 64, 0, implicit $exec
%2:vgpr_32 = V_ADD_U32_e64 64, %0.sub1, 0, implicit $exec
S_ENDPGM 0, implicit %1, implicit %2
...
# FIXME: Why doesn't this CSE?
---
name: commute_target_flag_frame_index
tracksRegLiveness: true
stack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: commute_target_flag_frame_index
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, [[COPY]].sub0, 0, implicit $exec
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]]
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = V_ADD_U32_e64 %0.sub0, %stack.0, 0, implicit $exec
%2:vgpr_32 = V_ADD_U32_e64 %stack.0, %0.sub0, 0, implicit $exec
S_ENDPGM 0, implicit %1, implicit %2
...
# FIXME: Handle commuting global variables
---
name: commute_target_flag_global
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: commute_target_flag_global
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
%0:sreg_64 = COPY $sgpr0_sgpr1
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc
S_ENDPGM 0, implicit %1, implicit %2
...
---
name: commute_target_flag_global_offset
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: commute_target_flag_global_offset
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
%0:sreg_64 = COPY $sgpr0_sgpr1
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 4, %0.sub0, implicit-def dead $scc
S_ENDPGM 0, implicit %1, implicit %2
...
---
name: commute_target_flag_global_offset_mismatch
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: commute_target_flag_global_offset_mismatch
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, [[COPY]].sub0, implicit-def dead $scc
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
%0:sreg_64 = COPY $sgpr0_sgpr1
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, %0.sub0, implicit-def dead $scc
S_ENDPGM 0, implicit %1, implicit %2
...