# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s # --- # the COPY can be coalesced based on subregister liveness name: subrange_coalesce_liveout tracksRegLiveness: true body: | ; GCN-LABEL: name: subrange_coalesce_liveout ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1 %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec %2:vgpr_32 = COPY %1.sub0 S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: successors: %bb.2 %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec S_BRANCH %bb.2 bb.2: %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec S_ENDPGM 0 ... --- # early-clobber stops the coalescer from coalescing the COPY name: subrange_coalesce_early_clobber tracksRegLiveness: true body: | ; GCN-LABEL: name: subrange_coalesce_early_clobber ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1 %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec %2:vgpr_32 = COPY %1.sub0 S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: successors: %bb.2 early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec S_BRANCH %bb.2 bb.2: %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec S_ENDPGM 0 ... --- # non-conflict lane(sub1) was redefined, coalescable name: subrange_coalesce_unrelated_sub_redefined tracksRegLiveness: true body: | ; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1 %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec %2:vgpr_32 = COPY %1.sub0 S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: successors: %bb.2 %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec ; %1.sub1 was re-defined %1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec S_BRANCH %bb.2 bb.2: %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec S_ENDPGM 0 ... --- # Another complex example showing the capability of resolving lane conflict # based on subranges. name: subrange_coalesce_complex_pattern tracksRegLiveness: true body: | ; GCN-LABEL: name: subrange_coalesce_complex_pattern ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec ; GCN-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1 %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec %2:vgpr_32 = COPY %1.sub0 S_CBRANCH_EXECZ %bb.2, implicit $exec S_BRANCH %bb.1 bb.1: successors: %bb.1, %bb.2 %2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec %1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec S_CBRANCH_EXECZ %bb.1, implicit $exec S_BRANCH %bb.2 bb.2: %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec S_ENDPGM 0 ...