stack-realign-kernel.ll 13.2 KB
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900  < %s | FileCheck -check-prefix=GFX9 %s

; Make sure the stack is never realigned for entry functions.

define amdgpu_kernel void @max_alignment_128() #0 {
; VI-LABEL: max_alignment_128:
; VI:       ; %bb.0:
; VI-NEXT:    s_add_u32 s4, s4, s7
; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
; VI-NEXT:    s_add_u32 s0, s0, s7
; VI-NEXT:    s_addc_u32 s1, s1, 0
; VI-NEXT:    v_mov_b32_e32 v0, 9
; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
; VI-NEXT:    s_endpgm
; VI-NEXT:    .section .rodata,#alloc
; VI-NEXT:    .p2align 6
; VI-NEXT:    .amdhsa_kernel max_alignment_128
; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
; VI-NEXT:     .amdhsa_private_segment_fixed_size 256
; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
; VI-NEXT:     .amdhsa_next_free_vgpr 1
; VI-NEXT:     .amdhsa_next_free_sgpr 8
; VI-NEXT:     .amdhsa_reserve_vcc 0
; VI-NEXT:     .amdhsa_float_round_mode_32 0
; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
; VI-NEXT:     .amdhsa_dx10_clamp 1
; VI-NEXT:     .amdhsa_ieee_mode 1
; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
; VI-NEXT:     .amdhsa_exception_int_div_zero 0
; VI-NEXT:    .end_amdhsa_kernel
; VI-NEXT:    .text
;
; GFX9-LABEL: max_alignment_128:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
; GFX9-NEXT:    s_add_u32 s0, s0, s7
; GFX9-NEXT:    s_addc_u32 s1, s1, 0
; GFX9-NEXT:    v_mov_b32_e32 v0, 9
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
; GFX9-NEXT:    s_endpgm
; GFX9-NEXT:    .section .rodata,#alloc
; GFX9-NEXT:    .p2align 6
; GFX9-NEXT:    .amdhsa_kernel max_alignment_128
; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 256
; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
; GFX9-NEXT:     .amdhsa_reserve_vcc 0
; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
; GFX9-NEXT:     .amdhsa_dx10_clamp 1
; GFX9-NEXT:     .amdhsa_ieee_mode 1
; GFX9-NEXT:     .amdhsa_fp16_overflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
; GFX9-NEXT:    .end_amdhsa_kernel
; GFX9-NEXT:    .text
  %alloca.align = alloca i32, align 128, addrspace(5)
  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
  ret void
}

define amdgpu_kernel void @stackrealign_attr() #1 {
; VI-LABEL: stackrealign_attr:
; VI:       ; %bb.0:
; VI-NEXT:    s_add_u32 s4, s4, s7
; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
; VI-NEXT:    s_add_u32 s0, s0, s7
; VI-NEXT:    s_addc_u32 s1, s1, 0
; VI-NEXT:    v_mov_b32_e32 v0, 9
; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
; VI-NEXT:    s_endpgm
; VI-NEXT:    .section .rodata,#alloc
; VI-NEXT:    .p2align 6
; VI-NEXT:    .amdhsa_kernel stackrealign_attr
; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
; VI-NEXT:     .amdhsa_private_segment_fixed_size 8
; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
; VI-NEXT:     .amdhsa_next_free_vgpr 1
; VI-NEXT:     .amdhsa_next_free_sgpr 8
; VI-NEXT:     .amdhsa_reserve_vcc 0
; VI-NEXT:     .amdhsa_float_round_mode_32 0
; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
; VI-NEXT:     .amdhsa_dx10_clamp 1
; VI-NEXT:     .amdhsa_ieee_mode 1
; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
; VI-NEXT:     .amdhsa_exception_int_div_zero 0
; VI-NEXT:    .end_amdhsa_kernel
; VI-NEXT:    .text
;
; GFX9-LABEL: stackrealign_attr:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
; GFX9-NEXT:    s_add_u32 s0, s0, s7
; GFX9-NEXT:    s_addc_u32 s1, s1, 0
; GFX9-NEXT:    v_mov_b32_e32 v0, 9
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
; GFX9-NEXT:    s_endpgm
; GFX9-NEXT:    .section .rodata,#alloc
; GFX9-NEXT:    .p2align 6
; GFX9-NEXT:    .amdhsa_kernel stackrealign_attr
; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 8
; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
; GFX9-NEXT:     .amdhsa_reserve_vcc 0
; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
; GFX9-NEXT:     .amdhsa_dx10_clamp 1
; GFX9-NEXT:     .amdhsa_ieee_mode 1
; GFX9-NEXT:     .amdhsa_fp16_overflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
; GFX9-NEXT:    .end_amdhsa_kernel
; GFX9-NEXT:    .text
  %alloca.align = alloca i32, align 4, addrspace(5)
  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
  ret void
}

define amdgpu_kernel void @alignstack_attr() #2 {
; VI-LABEL: alignstack_attr:
; VI:       ; %bb.0:
; VI-NEXT:    s_add_u32 s4, s4, s7
; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
; VI-NEXT:    s_add_u32 s0, s0, s7
; VI-NEXT:    s_addc_u32 s1, s1, 0
; VI-NEXT:    v_mov_b32_e32 v0, 9
; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
; VI-NEXT:    s_endpgm
; VI-NEXT:    .section .rodata,#alloc
; VI-NEXT:    .p2align 6
; VI-NEXT:    .amdhsa_kernel alignstack_attr
; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
; VI-NEXT:     .amdhsa_private_segment_fixed_size 128
; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
; VI-NEXT:     .amdhsa_next_free_vgpr 1
; VI-NEXT:     .amdhsa_next_free_sgpr 8
; VI-NEXT:     .amdhsa_reserve_vcc 0
; VI-NEXT:     .amdhsa_float_round_mode_32 0
; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
; VI-NEXT:     .amdhsa_dx10_clamp 1
; VI-NEXT:     .amdhsa_ieee_mode 1
; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
; VI-NEXT:     .amdhsa_exception_int_div_zero 0
; VI-NEXT:    .end_amdhsa_kernel
; VI-NEXT:    .text
;
; GFX9-LABEL: alignstack_attr:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
; GFX9-NEXT:    s_add_u32 s0, s0, s7
; GFX9-NEXT:    s_addc_u32 s1, s1, 0
; GFX9-NEXT:    v_mov_b32_e32 v0, 9
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
; GFX9-NEXT:    s_endpgm
; GFX9-NEXT:    .section .rodata,#alloc
; GFX9-NEXT:    .p2align 6
; GFX9-NEXT:    .amdhsa_kernel alignstack_attr
; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 128
; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
; GFX9-NEXT:     .amdhsa_reserve_vcc 0
; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
; GFX9-NEXT:     .amdhsa_dx10_clamp 1
; GFX9-NEXT:     .amdhsa_ieee_mode 1
; GFX9-NEXT:     .amdhsa_fp16_overflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
; GFX9-NEXT:    .end_amdhsa_kernel
; GFX9-NEXT:    .text
  %alloca.align = alloca i32, align 4, addrspace(5)
  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
  ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind "stackrealign" }
attributes #2 = { nounwind alignstack=128 }