mi-peephole-splat.ll 5.17 KB
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \
; RUN: | FileCheck --check-prefix=CHECK-LE %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mattr=+vsx < %s \
; RUN: | FileCheck --check-prefix=CHECK-BE %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s \
; RUN: | FileCheck --check-prefix=CHECK-P9LE %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mcpu=pwr9 < %s \
; RUN: | FileCheck --check-prefix=CHECK-P9BE %s

define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind  {
; CHECK-LE-LABEL: splat_swap:
; CHECK-LE:       # %bb.0:
; CHECK-LE-NEXT:    mflr 0
; CHECK-LE-NEXT:    std 0, 16(1)
; CHECK-LE-NEXT:    stdu 1, -80(1)
; CHECK-LE-NEXT:    li 3, 64
; CHECK-LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
; CHECK-LE-NEXT:    xvadddp 63, 34, 35
; CHECK-LE-NEXT:    xxlor 1, 63, 63
; CHECK-LE-NEXT:    bl rint
; CHECK-LE-NEXT:    nop
; CHECK-LE-NEXT:    xxswapd 0, 63
; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-LE-NEXT:    li 3, 48
; CHECK-LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
; CHECK-LE-NEXT:    fmr 1, 0
; CHECK-LE-NEXT:    bl rint
; CHECK-LE-NEXT:    nop
; CHECK-LE-NEXT:    li 3, 48
; CHECK-LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
; CHECK-LE-NEXT:    li 3, 64
; CHECK-LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
; CHECK-LE-NEXT:    xxmrghd 0, 0, 1
; CHECK-LE-NEXT:    xxswapd 1, 0
; CHECK-LE-NEXT:    xssubdp 1, 1, 0
; CHECK-LE-NEXT:    addi 1, 1, 80
; CHECK-LE-NEXT:    ld 0, 16(1)
; CHECK-LE-NEXT:    mtlr 0
; CHECK-LE-NEXT:    blr
;
; CHECK-BE-LABEL: splat_swap:
; CHECK-BE:       # %bb.0:
; CHECK-BE-NEXT:    mflr 0
; CHECK-BE-NEXT:    std 0, 16(1)
; CHECK-BE-NEXT:    stdu 1, -160(1)
; CHECK-BE-NEXT:    li 3, 144
; CHECK-BE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
; CHECK-BE-NEXT:    xvadddp 63, 34, 35
; CHECK-BE-NEXT:    xxlor 1, 63, 63
; CHECK-BE-NEXT:    bl rint
; CHECK-BE-NEXT:    nop
; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-BE-NEXT:    li 3, 128
; CHECK-BE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
; CHECK-BE-NEXT:    xxswapd 1, 63
; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
; CHECK-BE-NEXT:    bl rint
; CHECK-BE-NEXT:    nop
; CHECK-BE-NEXT:    li 3, 128
; CHECK-BE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-BE-NEXT:    li 3, 144
; CHECK-BE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
; CHECK-BE-NEXT:    xxmrghd 0, 0, 1
; CHECK-BE-NEXT:    xxswapd 1, 0
; CHECK-BE-NEXT:    xssubdp 1, 0, 1
; CHECK-BE-NEXT:    addi 1, 1, 160
; CHECK-BE-NEXT:    ld 0, 16(1)
; CHECK-BE-NEXT:    mtlr 0
; CHECK-BE-NEXT:    blr
;
; CHECK-P9LE-LABEL: splat_swap:
; CHECK-P9LE:       # %bb.0:
; CHECK-P9LE-NEXT:    mflr 0
; CHECK-P9LE-NEXT:    std 0, 16(1)
; CHECK-P9LE-NEXT:    stdu 1, -64(1)
; CHECK-P9LE-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
; CHECK-P9LE-NEXT:    xvadddp 63, 34, 35
; CHECK-P9LE-NEXT:    xscpsgndp 1, 63, 63
; CHECK-P9LE-NEXT:    bl rint
; CHECK-P9LE-NEXT:    nop
; CHECK-P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9LE-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
; CHECK-P9LE-NEXT:    xxswapd 1, 63
; CHECK-P9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
; CHECK-P9LE-NEXT:    bl rint
; CHECK-P9LE-NEXT:    nop
; CHECK-P9LE-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
; CHECK-P9LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9LE-NEXT:    xxmrghd 0, 0, 1
; CHECK-P9LE-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
; CHECK-P9LE-NEXT:    xxswapd 1, 0
; CHECK-P9LE-NEXT:    xssubdp 1, 1, 0
; CHECK-P9LE-NEXT:    addi 1, 1, 64
; CHECK-P9LE-NEXT:    ld 0, 16(1)
; CHECK-P9LE-NEXT:    mtlr 0
; CHECK-P9LE-NEXT:    blr
;
; CHECK-P9BE-LABEL: splat_swap:
; CHECK-P9BE:       # %bb.0:
; CHECK-P9BE-NEXT:    mflr 0
; CHECK-P9BE-NEXT:    std 0, 16(1)
; CHECK-P9BE-NEXT:    stdu 1, -144(1)
; CHECK-P9BE-NEXT:    stxv 63, 128(1) # 16-byte Folded Spill
; CHECK-P9BE-NEXT:    xvadddp 63, 34, 35
; CHECK-P9BE-NEXT:    xscpsgndp 1, 63, 63
; CHECK-P9BE-NEXT:    bl rint
; CHECK-P9BE-NEXT:    nop
; CHECK-P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9BE-NEXT:    stxv 1, 112(1) # 16-byte Folded Spill
; CHECK-P9BE-NEXT:    xxswapd 1, 63
; CHECK-P9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
; CHECK-P9BE-NEXT:    bl rint
; CHECK-P9BE-NEXT:    nop
; CHECK-P9BE-NEXT:    lxv 0, 112(1) # 16-byte Folded Reload
; CHECK-P9BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
; CHECK-P9BE-NEXT:    xxmrghd 0, 0, 1
; CHECK-P9BE-NEXT:    lxv 63, 128(1) # 16-byte Folded Reload
; CHECK-P9BE-NEXT:    xxswapd 1, 0
; CHECK-P9BE-NEXT:    xssubdp 1, 0, 1
; CHECK-P9BE-NEXT:    addi 1, 1, 144
; CHECK-P9BE-NEXT:    ld 0, 16(1)
; CHECK-P9BE-NEXT:    mtlr 0
; CHECK-P9BE-NEXT:    blr
  %added = fadd <2 x double> %x, %y
  %call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone
  %res1 = extractelement <2 x double> %call, i32 0
  %res2 = extractelement <2 x double> %call, i32 1
  %ret = fsub double %res1, %res2
  ret double %ret
}

declare <2 x double> @llvm.rint.v2f64(<2 x double>)