Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s

declare hidden amdgpu_gfx void @external_void_func_void() #0

define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
; GFX9-NEXT:    v_writelane_b32 v40, s5, 1
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
; GFX9-NEXT:    s_getpc_b64 s[4:5]
; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT:    v_readlane_b32 s31, v40, 3
; GFX9-NEXT:    v_readlane_b32 s30, v40, 2
; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
; GFX10-NEXT:    v_writelane_b32 v40, s5, 1
; GFX10-NEXT:    s_getpc_b64 s[4:5]
; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT:    v_readlane_b32 s31, v40, 3
; GFX10-NEXT:    v_readlane_b32 s30, v40, 2
; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 4
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
; GFX11-NEXT:    v_writelane_b32 v40, s5, 1
; GFX11-NEXT:    s_getpc_b64 s[4:5]
; GFX11-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v40, s30, 2
; GFX11-NEXT:    v_writelane_b32 v40, s31, 3
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[4:5]
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[4:5]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v40, 3
; GFX11-NEXT:    v_readlane_b32 s30, v40, 2
; GFX11-NEXT:    v_readlane_b32 s5, v40, 1
; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 4
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "", ""() #0
  call amdgpu_gfx void @external_void_func_void()
  ret void
}

define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 {
; GFX9-LABEL: void_func_void_clobber_s28_s29:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v0, s28, 0
; GFX9-NEXT:    v_writelane_b32 v0, s29, 1
; GFX9-NEXT:    v_writelane_b32 v0, s30, 2
; GFX9-NEXT:    v_writelane_b32 v0, s31, 3
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; clobber
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; clobber
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_readlane_b32 s31, v0, 3
; GFX9-NEXT:    v_readlane_b32 s30, v0, 2
; GFX9-NEXT:    v_readlane_b32 s29, v0, 1
; GFX9-NEXT:    v_readlane_b32 s28, v0, 0
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s28_s29:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v0, s28, 0
; GFX10-NEXT:    v_writelane_b32 v0, s29, 1
; GFX10-NEXT:    v_writelane_b32 v0, s30, 2
; GFX10-NEXT:    v_writelane_b32 v0, s31, 3
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; clobber
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; clobber
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_readlane_b32 s31, v0, 3
; GFX10-NEXT:    v_readlane_b32 s30, v0, 2
; GFX10-NEXT:    v_readlane_b32 s29, v0, 1
; GFX10-NEXT:    v_readlane_b32 s28, v0, 0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s28_s29:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v0, s28, 0
; GFX11-NEXT:    v_writelane_b32 v0, s29, 1
; GFX11-NEXT:    v_writelane_b32 v0, s30, 2
; GFX11-NEXT:    v_writelane_b32 v0, s31, 3
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; clobber
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; clobber
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v0, 3
; GFX11-NEXT:    v_readlane_b32 s30, v0, 2
; GFX11-NEXT:    v_readlane_b32 s29, v0, 1
; GFX11-NEXT:    v_readlane_b32 s28, v0, 0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
; GCN:          v_writelane_b32 v0, s28, 0
; GCN:          v_writelane_b32 v0, s29, 1

; GCN:          v_readlane_b32 s28, v0, 0
; GCN:          v_readlane_b32 s29, v0, 1
  call void asm sideeffect "; clobber", "~{s[28:29]}"() #0
  ret void
}

define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 {
; GFX9-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def s31
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    s_mov_b32 s4, s31
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    s_mov_b32 s31, s4
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use s31
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def s31
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    s_mov_b32 s4, s31
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    s_mov_b32 s31, s4
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s31
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def s31
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_mov_b32 s4, s31
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_mov_b32 s31, s4
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s31
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
  ret void
}

define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)* %out) #0 {
; GFX9-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def v31
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_mov_b32_e32 v41, v31
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_mov_b32_e32 v31, v41
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use v31
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def v31
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
; GFX10-NEXT:    v_mov_b32_e32 v41, v31
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_mov_b32_e32 v31, v41
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use v31
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:4 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def v31
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
; GFX11-NEXT:    v_mov_b32_e32 v41, v31
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    v_mov_b32_e32 v31, v41
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use v31
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:4 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
  ret void
}


define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* %out) #0 {
; GFX9-LABEL: test_call_void_func_void_preserves_s33:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def s33
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    s_mov_b32 s4, s33
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    s_mov_b32 s33, s4
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use s33
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_preserves_s33:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def s33
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
; GFX10-NEXT:    s_mov_b32 s4, s33
; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    s_mov_b32 s33, s4
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s33
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_preserves_s33:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def s33
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
; GFX11-NEXT:    s_mov_b32 s4, s33
; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_mov_b32 s33, s4
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s33
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
  ret void
}

define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* %out) #0 {
; GFX9-LABEL: test_call_void_func_void_preserves_s34:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def s34
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
; GFX9-NEXT:    s_mov_b32 s4, s34
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    s_mov_b32 s34, s4
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use s34
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_preserves_s34:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[36:37]
; GFX10-NEXT:    s_add_u32 s36, s36, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s37, s37, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def s34
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
; GFX10-NEXT:    s_mov_b32 s4, s34
; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[36:37]
; GFX10-NEXT:    s_mov_b32 s34, s4
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s34
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_preserves_s34:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def s34
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
; GFX11-NEXT:    s_mov_b32 s4, s34
; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_mov_b32 s34, s4
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s34
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %s34 = call i32 asm sideeffect "; def $0", "={s34}"()
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
  ret void
}

define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* %out) #0 {
; GFX9-LABEL: test_call_void_func_void_preserves_v40:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v41, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v41, s30, 0
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    v_writelane_b32 v41, s31, 1
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def v40
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use v40
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    v_readlane_b32 s31, v41, 1
; GFX9-NEXT:    v_readlane_b32 s30, v41, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v41, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_preserves_v40:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v41, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def v40
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_writelane_b32 v41, s30, 0
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v41, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use v40
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    v_readlane_b32 s31, v41, 1
; GFX10-NEXT:    v_readlane_b32 s30, v41, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v41, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_preserves_v40:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v41, s32 offset:4 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v41, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def v40
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_writelane_b32 v41, s30, 0
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v41, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use v40
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    v_readlane_b32 s31, v41, 1
; GFX11-NEXT:    v_readlane_b32 s30, v41, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v41, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v41, off, s32 offset:4 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %v40 = call i32 asm sideeffect "; def $0", "={v40}"()
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
  ret void
}

define hidden void @void_func_void_clobber_s33() #1 {
; GFX9-LABEL: void_func_void_clobber_s33:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
; GFX9-NEXT:    v_writelane_b32 v0, s33, 0
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; clobber
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_readlane_b32 s33, v0, 0
; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s33:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s4
; GFX10-NEXT:    v_writelane_b32 v0, s33, 0
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; clobber
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_readlane_b32 s33, v0, 0
; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s4
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s33:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v0, s33, 0
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; clobber
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s33, v0, 0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  call void asm sideeffect "; clobber", "~{s33}"() #0
  ret void
}

define hidden void @void_func_void_clobber_s34() #1 {
; GFX9-LABEL: void_func_void_clobber_s34:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
; GFX9-NEXT:    v_writelane_b32 v0, s34, 0
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; clobber
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_readlane_b32 s34, v0, 0
; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s34:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s4
; GFX10-NEXT:    v_writelane_b32 v0, s34, 0
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; clobber
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_readlane_b32 s34, v0, 0
; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s4
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s34:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v0, s34, 0
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; clobber
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s34, v0, 0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  call void asm sideeffect "; clobber", "~{s34}"() #0
  ret void
}

define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
; GFX9-LABEL: test_call_void_func_void_clobber_s33:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_clobber_s33:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_clobber_s33:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, void_func_void_clobber_s33@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, void_func_void_clobber_s33@rel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  call amdgpu_gfx void @void_func_void_clobber_s33()
  ret void
}

define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
; GFX9-LABEL: test_call_void_func_void_clobber_s34:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_clobber_s34:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    v_readlane_b32 s31, v40, 1
; GFX10-NEXT:    v_readlane_b32 s30, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_clobber_s34:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 2
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, void_func_void_clobber_s34@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, void_func_void_clobber_s34@rel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 2
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  call amdgpu_gfx void @void_func_void_clobber_s34()
  ret void
}

define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
; GFX9-LABEL: callee_saved_sgpr_kernel:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def s40
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    s_mov_b32 s4, s40
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use s4
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: callee_saved_sgpr_kernel:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def s40
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
; GFX10-NEXT:    s_mov_b32 s4, s40
; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s4
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: callee_saved_sgpr_kernel:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def s40
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
; GFX11-NEXT:    s_mov_b32 s4, s40
; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s4
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
  ret void
}

define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX9:       ; %bb.0:
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
; GFX9-NEXT:    v_writelane_b32 v40, s4, 0
; GFX9-NEXT:    s_mov_b32 s33, s32
; GFX9-NEXT:    s_addk_i32 s32, 0x400
; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def s40
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    s_mov_b32 s4, s40
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; def v32
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    v_mov_b32_e32 v41, v32
; GFX9-NEXT:    s_getpc_b64 s[34:35]
; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use s4
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    ;;#ASMSTART
; GFX9-NEXT:    ; use v41
; GFX9-NEXT:    ;;#ASMEND
; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT:    v_readlane_b32 s31, v40, 2
; GFX9-NEXT:    v_readlane_b32 s30, v40, 1
; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
; GFX9-NEXT:    s_addk_i32 s32, 0xfc00
; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
; GFX9-NEXT:    s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT:    s_mov_b64 exec, s[34:35]
; GFX9-NEXT:    s_waitcnt vmcnt(0)
; GFX9-NEXT:    s_setpc_b64 s[30:31]
;
; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX10:       ; %bb.0:
; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
; GFX10-NEXT:    s_mov_b32 s33, s32
; GFX10-NEXT:    s_addk_i32 s32, 0x200
; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def s40
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_writelane_b32 v40, s4, 0
; GFX10-NEXT:    s_mov_b32 s4, s40
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; def v32
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    v_mov_b32_e32 v41, v32
; GFX10-NEXT:    s_getpc_b64 s[34:35]
; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use s4
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    ;;#ASMSTART
; GFX10-NEXT:    ; use v41
; GFX10-NEXT:    ;;#ASMEND
; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT:    v_readlane_b32 s31, v40, 2
; GFX10-NEXT:    v_readlane_b32 s30, v40, 1
; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
; GFX10-NEXT:    s_addk_i32 s32, 0xfe00
; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
; GFX10-NEXT:    s_or_saveexec_b32 s34, -1
; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
; GFX10-NEXT:    s_mov_b32 exec_lo, s34
; GFX10-NEXT:    s_waitcnt vmcnt(0)
; GFX10-NEXT:    s_setpc_b64 s[30:31]
;
; GFX11-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX11:       ; %bb.0:
; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:4 ; 4-byte Folded Spill
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    v_writelane_b32 v40, s33, 3
; GFX11-NEXT:    s_mov_b32 s33, s32
; GFX11-NEXT:    s_add_i32 s32, s32, 16
; GFX11-NEXT:    scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def s40
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_writelane_b32 v40, s4, 0
; GFX11-NEXT:    s_mov_b32 s4, s40
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; def v32
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    v_mov_b32_e32 v41, v32
; GFX11-NEXT:    s_getpc_b64 s[0:1]
; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT:    v_writelane_b32 v40, s30, 1
; GFX11-NEXT:    v_writelane_b32 v40, s31, 2
; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use s4
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    ;;#ASMSTART
; GFX11-NEXT:    ; use v41
; GFX11-NEXT:    ;;#ASMEND
; GFX11-NEXT:    scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT:    v_readlane_b32 s31, v40, 2
; GFX11-NEXT:    v_readlane_b32 s30, v40, 1
; GFX11-NEXT:    v_readlane_b32 s4, v40, 0
; GFX11-NEXT:    s_add_i32 s32, s32, -16
; GFX11-NEXT:    v_readlane_b32 s33, v40, 3
; GFX11-NEXT:    s_or_saveexec_b32 s0, -1
; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:4 ; 4-byte Folded Reload
; GFX11-NEXT:    s_mov_b32 exec_lo, s0
; GFX11-NEXT:    s_waitcnt vmcnt(0)
; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
  call amdgpu_gfx void @external_void_func_void()
  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
  call void asm sideeffect "; use $0", "v"(i32 %v32) #0
  ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }