pr44140.ll
3.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=znver1 | FileCheck %s
define win64cc void @opaque() {
; CHECK-LABEL: opaque:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
ret void
}
; We need xmm6 to be live from the loop header across all iterations of the loop.
; We shouldn't clobber ymm6 inside the loop.
define i32 @main() {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: subq $584, %rsp # imm = 0x248
; CHECK-NEXT: .cfi_def_cfa_offset 592
; CHECK-NEXT: vmovaps {{.*#+}} xmm6 = [1010101010101010101,2020202020202020202]
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_1: # %fake-loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3
; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5
; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm4
; CHECK-NEXT: vmovups %ymm5, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm4, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: callq opaque
; CHECK-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp)
; CHECK-NEXT: testb %sil, %sil
; CHECK-NEXT: jne .LBB1_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: movabsq $1010101010101010101, %rcx # imm = 0xE04998456557EB5
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq %rcx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: sete %al
; CHECK-NEXT: decl %eax
; CHECK-NEXT: addq $584, %rsp # imm = 0x248
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
start:
%dummy0 = alloca [22 x i64], align 8
%dummy1 = alloca [22 x i64], align 8
%dummy2 = alloca [22 x i64], align 8
%data = alloca <2 x i64>, align 8
br label %fake-loop
fake-loop: ; preds = %fake-loop, %start
%dummy0.cast = bitcast [22 x i64]* %dummy0 to i8*
%dummy1.cast = bitcast [22 x i64]* %dummy1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy1.cast, i8* nonnull align 8 %dummy0.cast, i64 176, i1 false)
%dummy1.cast.copy = bitcast [22 x i64]* %dummy1 to i8*
%dummy2.cast = bitcast [22 x i64]* %dummy2 to i8*
call void @llvm.lifetime.start.p0i8(i64 176, i8* nonnull %dummy2.cast)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 %dummy2.cast, i8* nonnull align 8 %dummy1.cast.copy, i64 176, i1 false)
call win64cc void @opaque()
store <2 x i64> <i64 1010101010101010101, i64 2020202020202020202>, <2 x i64>* %data, align 8
%opaque-false = icmp eq i8 0, 1
br i1 %opaque-false, label %fake-loop, label %exit
exit: ; preds = %fake-loop
%data.cast = bitcast <2 x i64>* %data to i64*
%0 = load i64, i64* %data.cast, align 8
%1 = icmp eq i64 %0, 1010101010101010101
%2 = select i1 %1, i32 0, i32 -1
ret i32 %2
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #0
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0
attributes #0 = { argmemonly nounwind }