loop-guards.ll 8.44 KB
; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL

; Not implemented as a mir test so that changes the generic HardwareLoop can
; also be tested. These functions have been taken from
; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation
; of a few test.set intrinsics, but only one (ne_trip_count) gets generated
; here. Simplifications result in icmps changing and maybe also the CFG. So,
; TODO: Teach the HardwareLoops some better pattern recognition.

; CHECK-GLOBAL-NOT: DoLoopStart
; CHECK-GLOBAL-NOT: WhileLoopStart
; CHECK-GLOBAL-NOT: LoopEnd

; CHECK: ne_and_guard
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK:   t2CMPri renamable $lr, 0
; CHECK:   tBcc %bb.3
; CHECK: bb.1.while.body.preheader:
; CHECK:   $lr = t2DLS renamable $lr
; CHECK: bb.2.while.body:
; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
entry:
  %brmerge.demorgan = and i1 %t1, %t2
  %cmp6 = icmp ne i32 %N, 0
  %or.cond = and i1 %brmerge.demorgan, %cmp6
  br i1 %or.cond, label %while.body, label %if.end

while.body:                                       ; preds = %while.body, %entry
  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ]
  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ]
  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
  %tmp = load i32, i32* %b.addr.07, align 4
  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
  store i32 %tmp, i32* %a.addr.08, align 4
  %inc = add nuw i32 %i.09, 1
  %exitcond = icmp eq i32 %inc, %N
  br i1 %exitcond, label %if.end, label %while.body

if.end:                                           ; preds = %while.body, %entry
  ret void
}

; TODO: This could generate WLS
; CHECK: ne_preheader
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK:   t2CMPri renamable $lr, 0
; CHECK:   tBcc %bb.3
; CHECK: bb.1.while.body.preheader:
; CHECK:   $lr = t2DLS renamable $lr
; CHECK: bb.2.while.body:
; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
entry:
  %brmerge.demorgan = and i1 %t1, %t2
  br i1 %brmerge.demorgan, label %while.preheader, label %if.end

while.preheader:                                  ; preds = %entry
  %cmp = icmp ne i32 %N, 0
  br i1 %cmp, label %while.body, label %if.end

while.body:                                       ; preds = %while.body, %while.preheader
  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
  %tmp = load i32, i32* %b.addr.07, align 4
  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
  store i32 %tmp, i32* %a.addr.08, align 4
  %inc = add nuw i32 %i.09, 1
  %exitcond = icmp eq i32 %inc, %N
  br i1 %exitcond, label %if.end, label %while.body

if.end:                                           ; preds = %while.body, %while.preheader, %entry
  ret void
}

; TODO: This could generate WLS
; CHECK: eq_preheader
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK:   t2CMPri renamable $lr, 0
; CHECK:   tBcc %bb.3
; CHECK: bb.1.while.body.preheader:
; CHECK:   $lr = t2DLS renamable $lr
; CHECK: bb.2.while.body:
; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
entry:
  %brmerge.demorgan = and i1 %t1, %t2
  br i1 %brmerge.demorgan, label %while.preheader, label %if.end

while.preheader:                                  ; preds = %entry
  %cmp = icmp eq i32 %N, 0
  br i1 %cmp, label %if.end, label %while.body

while.body:                                       ; preds = %while.body, %while.preheader
  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
  %tmp = load i32, i32* %b.addr.07, align 4
  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
  store i32 %tmp, i32* %a.addr.08, align 4
  %inc = add nuw i32 %i.09, 1
  %exitcond = icmp eq i32 %inc, %N
  br i1 %exitcond, label %if.end, label %while.body

if.end:                                           ; preds = %while.body, %while.preheader, %entry
  ret void
}

; TODO: This could generate WLS
; CHECK: ne_prepreheader
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK:   t2CMPri renamable $lr, 0
; CHECK:   tBcc %bb.3
; CHECK: bb.1.while.body.preheader:
; CHECK:   $lr = t2DLS renamable $lr
; CHECK: bb.2.while.body:
; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
entry:
  %cmp = icmp ne i32 %N, 0
  br i1 %cmp, label %while.preheader, label %if.end

while.preheader:                                  ; preds = %entry
  %brmerge.demorgan = and i1 %t1, %t2
  br i1 %brmerge.demorgan, label %while.body, label %if.end

while.body:                                       ; preds = %while.body, %while.preheader
  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
  %tmp = load i32, i32* %b.addr.07, align 4
  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
  store i32 %tmp, i32* %a.addr.08, align 4
  %inc = add nuw i32 %i.09, 1
  %exitcond = icmp eq i32 %inc, %N
  br i1 %exitcond, label %if.end, label %while.body

if.end:                                           ; preds = %while.body, %while.preheader, %entry
  ret void
}

; CHECK: be_ne
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK:   $lr = t2DLS renamable $lr
; CHECK: bb.1.do.body:
; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.1
define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
entry:
  %cmp = icmp ne i32 %N, 0
  %sub = sub i32 %N, 1
  %be = select i1 %cmp, i32 0, i32 %sub
  %cmp.1 = icmp ne i32 %be, 0
  br i1 %cmp.1, label %do.body, label %if.end

do.body:                                          ; preds = %do.body, %entry
  %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ]
  %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ]
  %i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ]
  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
  %tmp = load i32, i32* %b.addr.0, align 4
  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
  store i32 %tmp, i32* %a.addr.0, align 4
  %inc = add nuw i32 %i.0, 1
  %cmp.2 = icmp ult i32 %inc, %N
  br i1 %cmp.2, label %do.body, label %if.end

if.end:                                           ; preds = %do.body, %entry
  ret void
}

; TODO: Remove the tMOVr in the preheader!
; CHECK: ne_trip_count
; CHECK: body:
; CHECK: bb.0.entry:
; CHECK:   $lr = t2WLS $r3, %bb.3
; CHECK: bb.1.do.body.preheader:
; CHECK:   $lr = tMOVr
; CHECK: bb.2.do.body:
; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
entry:
  br label %do.body.preheader

do.body.preheader:
  %cmp = icmp ne i32 %N, 0
  br i1 %cmp, label %do.body, label %if.end

do.body:
  %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ]
  %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ]
  %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ]
  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
  %tmp = load i32, i32* %b.addr.0, align 4
  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
  store i32 %tmp, i32* %a.addr.0, align 4
  %inc = add nuw i32 %i.0, 1
  %cmp.1 = icmp ult i32 %inc, %N
  br i1 %cmp.1, label %do.body, label %if.end

if.end:                                           ; preds = %do.body, %entry
  ret void
}