llvm.amdgcn.rsq.clamp.ll 5.91 KB
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s

define float @v_rsq_clamp_f32(float %src) #0 {
; SI-LABEL: v_rsq_clamp_f32:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f32_e32 v0, v0
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f32:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f32_e32 v0, v0
; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT:    s_setpc_b64 s[30:31]
  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
  ret float %rsq_clamp
}

define float @v_rsq_clamp_fabs_f32(float %src) #0 {
; SI-LABEL: v_rsq_clamp_fabs_f32:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f32_e64 v0, |v0|
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_fabs_f32:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f32_e64 v0, |v0|
; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT:    s_setpc_b64 s[30:31]
  %fabs.src = call float @llvm.fabs.f32(float %src)
  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src)
  ret float %rsq_clamp
}

define double @v_rsq_clamp_f64(double %src) #0 {
; SI-LABEL: v_rsq_clamp_f64:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f64_e32 v[0:1], v[0:1]
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f64:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
; VI-NEXT:    s_mov_b32 s4, -1
; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_mov_b32 s5, 0xffefffff
; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_setpc_b64 s[30:31]
  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
  ret double %rsq_clamp
}

define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; SI-LABEL: v_rsq_clamp_fabs_f64:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f64_e64 v[0:1], |v[0:1]|
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_fabs_f64:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f64_e64 v[0:1], |v[0:1]|
; VI-NEXT:    s_mov_b32 s4, -1
; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_mov_b32 s5, 0xffefffff
; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_setpc_b64 s[30:31]
  %fabs.src = call double @llvm.fabs.f64(double %src)
  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
  ret double %rsq_clamp
}

define float @v_rsq_clamp_undef_f32() #0 {
; SI-LABEL: v_rsq_clamp_undef_f32:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f32_e32 v0, s4
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_undef_f32:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f32_e32 v0, s4
; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT:    s_setpc_b64 s[30:31]
  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
  ret float %rsq_clamp
}

define double @v_rsq_clamp_undef_f64() #0 {
; SI-LABEL: v_rsq_clamp_undef_f64:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f64_e32 v[0:1], s[4:5]
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_undef_f64:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f64_e32 v[0:1], s[4:5]
; VI-NEXT:    s_mov_b32 s4, -1
; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_mov_b32 s5, 0xffefffff
; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_setpc_b64 s[30:31]
  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
  ret double %rsq_clamp
}

define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
; SI-LABEL: v_rsq_clamp_f32_non_ieee:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f32_e32 v0, v0
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f32_non_ieee:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f32_e32 v0, v0
; VI-NEXT:    v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT:    v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT:    s_setpc_b64 s[30:31]
  %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
  ret float %rsq_clamp
}

define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; SI-LABEL: v_rsq_clamp_f64_non_ieee:
; SI:       ; %bb.0:
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT:    v_rsq_clamp_f64_e32 v[0:1], v[0:1]
; SI-NEXT:    s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f64_non_ieee:
; VI:       ; %bb.0:
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT:    v_rsq_f64_e32 v[0:1], v[0:1]
; VI-NEXT:    s_mov_b32 s4, -1
; VI-NEXT:    s_mov_b32 s5, 0x7fefffff
; VI-NEXT:    v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_mov_b32 s5, 0xffefffff
; VI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT:    s_setpc_b64 s[30:31]
  %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
  ret double %rsq_clamp
}

declare float @llvm.fabs.f32(float) #1
declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
declare double @llvm.fabs.f64(double) #1
declare double @llvm.amdgcn.rsq.clamp.f64(double) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "amdgpu-ieee"="false" }