vector-rotates.ll
4.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \
; RUN: -verify-machineinstrs -mcpu=pwr8 < %s | \
; RUN: FileCheck --check-prefix=CHECK-P8 %s
; RUN: llc -O3 -mtriple=powerpc64-unknown-unknown -ppc-asm-full-reg-names \
; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | \
; RUN: FileCheck --check-prefix=CHECK-P7 %s
define <16 x i8> @rotl_v16i8(<16 x i8> %a) {
; CHECK-P8-LABEL: rotl_v16i8:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: vrlb v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: rotl_v16i8:
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI0_0@toc@l
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
; CHECK-P7-NEXT: vrlb v2, v2, v3
; CHECK-P7-NEXT: blr
entry:
%b = shl <16 x i8> %a, <i8 1, i8 1, i8 2, i8 2, i8 3, i8 3, i8 4, i8 4, i8 5, i8 5, i8 6, i8 6, i8 7, i8 7, i8 8, i8 8>
%c = lshr <16 x i8> %a, <i8 7, i8 7, i8 6, i8 6, i8 5, i8 5, i8 4, i8 4, i8 3, i8 3, i8 2, i8 2, i8 1, i8 1, i8 0, i8 0>
%d = or <16 x i8> %b, %c
ret <16 x i8> %d
}
define <8 x i16> @rotl_v8i16(<8 x i16> %a) {
; CHECK-P8-LABEL: rotl_v8i16:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: vrlh v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: rotl_v8i16:
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI1_0@toc@l
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
; CHECK-P7-NEXT: vrlh v2, v2, v3
; CHECK-P7-NEXT: blr
entry:
%b = shl <8 x i16> %a, <i16 1, i16 2, i16 3, i16 5, i16 7, i16 11, i16 13, i16 16>
%c = lshr <8 x i16> %a, <i16 15, i16 14, i16 13, i16 11, i16 9, i16 5, i16 3, i16 0>
%d = or <8 x i16> %b, %c
ret <8 x i16> %d
}
define <4 x i32> @rotl_v4i32_0(<4 x i32> %a) {
; CHECK-P8-LABEL: rotl_v4i32_0:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-P8-NEXT: lvx v3, 0, r3
; CHECK-P8-NEXT: vrlw v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: rotl_v4i32_0:
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addis r3, r2, .LCPI2_0@toc@ha
; CHECK-P7-NEXT: addi r3, r3, .LCPI2_0@toc@l
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
; CHECK-P7-NEXT: vrlw v2, v2, v3
; CHECK-P7-NEXT: blr
entry:
%b = shl <4 x i32> %a, <i32 29, i32 19, i32 17, i32 11>
%c = lshr <4 x i32> %a, <i32 3, i32 13, i32 15, i32 21>
%d = or <4 x i32> %b, %c
ret <4 x i32> %d
}
define <4 x i32> @rotl_v4i32_1(<4 x i32> %a) {
; CHECK-P8-LABEL: rotl_v4i32_1:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: vspltisw v3, -16
; CHECK-P8-NEXT: vspltisw v4, 7
; CHECK-P8-NEXT: vsubuwm v3, v4, v3
; CHECK-P8-NEXT: vrlw v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: rotl_v4i32_1:
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: vspltisw v3, -16
; CHECK-P7-NEXT: vspltisw v4, 7
; CHECK-P7-NEXT: vsubuwm v3, v4, v3
; CHECK-P7-NEXT: vrlw v2, v2, v3
; CHECK-P7-NEXT: blr
entry:
%b = shl <4 x i32> %a, <i32 23, i32 23, i32 23, i32 23>
%c = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
%d = or <4 x i32> %b, %c
ret <4 x i32> %d
}
define <2 x i64> @rotl_v2i64(<2 x i64> %a) {
; CHECK-P8-LABEL: rotl_v2i64:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: addis r3, r2, .LCPI4_0@toc@ha
; CHECK-P8-NEXT: addi r3, r3, .LCPI4_0@toc@l
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
; CHECK-P8-NEXT: xxswapd vs35, vs0
; CHECK-P8-NEXT: vrld v2, v2, v3
; CHECK-P8-NEXT: blr
;
; CHECK-P7-LABEL: rotl_v2i64:
; CHECK-P7: # %bb.0: # %entry
; CHECK-P7-NEXT: addi r3, r1, -48
; CHECK-P7-NEXT: stxvd2x vs34, 0, r3
; CHECK-P7-NEXT: ld r3, -40(r1)
; CHECK-P7-NEXT: sldi r4, r3, 53
; CHECK-P7-NEXT: rldicl r3, r3, 53, 11
; CHECK-P7-NEXT: std r4, -8(r1)
; CHECK-P7-NEXT: ld r4, -48(r1)
; CHECK-P7-NEXT: sldi r5, r4, 41
; CHECK-P7-NEXT: rldicl r4, r4, 41, 23
; CHECK-P7-NEXT: std r5, -16(r1)
; CHECK-P7-NEXT: addi r5, r1, -16
; CHECK-P7-NEXT: lxvw4x vs0, 0, r5
; CHECK-P7-NEXT: std r3, -24(r1)
; CHECK-P7-NEXT: addi r3, r1, -32
; CHECK-P7-NEXT: std r4, -32(r1)
; CHECK-P7-NEXT: lxvw4x vs1, 0, r3
; CHECK-P7-NEXT: xxlor vs34, vs0, vs1
; CHECK-P7-NEXT: blr
entry:
%b = shl <2 x i64> %a, <i64 41, i64 53>
%c = lshr <2 x i64> %a, <i64 23, i64 11>
%d = or <2 x i64> %b, %c
ret <2 x i64> %d
}