bswap.ll
1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer %s | FileCheck -check-prefixes=GCN,GFX8 %s
; GCN-LABEL: @bswap_v2i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX7: call i16 @llvm.bswap.i16(
; GFX8: call <2 x i16> @llvm.bswap.v2i16(
define <2 x i16> @bswap_v2i16(<2 x i16> %arg) {
bb:
%tmp = extractelement <2 x i16> %arg, i64 0
%tmp1 = tail call i16 @llvm.bswap.i16(i16 %tmp)
%tmp2 = insertelement <2 x i16> undef, i16 %tmp1, i64 0
%tmp3 = extractelement <2 x i16> %arg, i64 1
%tmp4 = tail call i16 @llvm.bswap.i16(i16 %tmp3)
%tmp5 = insertelement <2 x i16> %tmp2, i16 %tmp4, i64 1
ret <2 x i16> %tmp5
}
; GCN-LABEL: @bswap_v2i32(
; GCN: call i32 @llvm.bswap.i32
; GCN: call i32 @llvm.bswap.i32
define <2 x i32> @bswap_v2i32(<2 x i32> %arg) {
bb:
%tmp = extractelement <2 x i32> %arg, i64 0
%tmp1 = tail call i32 @llvm.bswap.i32(i32 %tmp)
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i64 0
%tmp3 = extractelement <2 x i32> %arg, i64 1
%tmp4 = tail call i32 @llvm.bswap.i32(i32 %tmp3)
%tmp5 = insertelement <2 x i32> %tmp2, i32 %tmp4, i64 1
ret <2 x i32> %tmp5
}
declare i16 @llvm.bswap.i16(i16) #0
declare i32 @llvm.bswap.i32(i32) #0
attributes #0 = { nounwind readnone speculatable willreturn }