# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -global-isel-abort=1 -verify-machineinstrs %s -o - | FileCheck %s # # Check folding a G_SHL into a G_BRCOND which has been matched as a TB(N)Z. ... --- name: fold_shl alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: fold_shl ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr64all = COPY $x0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] ; CHECK-NEXT: TBNZW [[COPY1]], 2, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s64) = COPY $x0 %bit:gpr(s64) = G_CONSTANT i64 8 %zero:gpr(s64) = G_CONSTANT i64 0 ; tbnz (shl x, 1), 3 == tbnz x, 2 %fold_cst:gpr(s64) = G_CONSTANT i64 1 %fold_me:gpr(s64) = G_SHL %copy, %fold_cst %and:gpr(s64) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: dont_fold_shl_1 alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: dont_fold_shl_1 ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr64 = COPY $x0 ; CHECK-NEXT: %fold_me:gpr64 = UBFMXri %copy, 59, 58 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] ; CHECK-NEXT: TBNZW [[COPY1]], 3, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s64) = COPY $x0 %bit:gpr(s64) = G_CONSTANT i64 8 %zero:gpr(s64) = G_CONSTANT i64 0 ; 5 > 3, so we cannot do the transformation as above. %fold_cst:gpr(s64) = G_CONSTANT i64 5 %fold_me:gpr(s64) = G_SHL %copy, %fold_cst %and:gpr(s64) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: dont_fold_shl_2 alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: dont_fold_shl_2 ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr64 = COPY $x0 ; CHECK-NEXT: %fold_cst:gpr64 = MOVi64imm -5 ; CHECK-NEXT: %fold_me:gpr64 = LSLVXr %copy, %fold_cst ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %fold_me.sub_32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] ; CHECK-NEXT: TBNZW [[COPY1]], 3, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s64) = COPY $x0 %bit:gpr(s64) = G_CONSTANT i64 8 %zero:gpr(s64) = G_CONSTANT i64 0 ; Same case as above, except we wrap around. %fold_cst:gpr(s64) = G_CONSTANT i64 -5 %fold_me:gpr(s64) = G_SHL %copy, %fold_cst %and:gpr(s64) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: dont_fold_shl_3 alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: dont_fold_shl_3 ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr64 = COPY $x0 ; CHECK-NEXT: %shl:gpr64 = UBFMXri %copy, 62, 61 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %shl.sub_32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] ; CHECK-NEXT: TBNZW [[COPY1]], 3, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: %second_use:gpr64sp = ORRXri %shl, 8000 ; CHECK-NEXT: $x0 = COPY %second_use ; CHECK-NEXT: RET_ReallyLR implicit $x0 bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s64) = COPY $x0 %bit:gpr(s64) = G_CONSTANT i64 8 %zero:gpr(s64) = G_CONSTANT i64 0 %fold_cst:gpr(s64) = G_CONSTANT i64 2 ; Don't walk past the G_SHL when it's used more than once. %shl:gpr(s64) = G_SHL %copy, %fold_cst %and:gpr(s64) = G_AND %shl, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: %second_use:gpr(s64) = G_OR %shl, %bit $x0 = COPY %second_use RET_ReallyLR implicit $x0 ... --- name: fold_ashr_in_range alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: fold_ashr_in_range ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr64all = COPY $x0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] ; CHECK-NEXT: TBNZW [[COPY1]], 4, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s64) = COPY $x0 %bit:gpr(s64) = G_CONSTANT i64 8 %zero:gpr(s64) = G_CONSTANT i64 0 ; tb(n)z (ashr x, c), b == tbz(x, b + c) when b+c <= the size of the type. ; In this case, we should get 1 + 3 = 4 as the bit number. %fold_cst:gpr(s64) = G_CONSTANT i64 1 %fold_me:gpr(s64) = G_ASHR %copy, %fold_cst %and:gpr(s64) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: fold_ashr_msb_1 alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: fold_ashr_msb_1 ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32 = COPY $w0 ; CHECK-NEXT: TBNZW %copy, 31, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s32) = COPY $w0 %bit:gpr(s32) = G_CONSTANT i32 8 %zero:gpr(s32) = G_CONSTANT i32 0 ; We should get a TBNZW with a 31 as the bit. %fold_cst:gpr(s32) = G_CONSTANT i32 1234 %fold_me:gpr(s32) = G_ASHR %copy, %fold_cst %and:gpr(s32) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: fold_ashr_msb_2 alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: fold_ashr_msb_2 ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr64 = COPY $x0 ; CHECK-NEXT: TBNZX %copy, 63, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s64) = COPY $x0 %bit:gpr(s64) = G_CONSTANT i64 8 %zero:gpr(s64) = G_CONSTANT i64 0 ; We should get a TBNZX with a 63 as the bit. %fold_cst:gpr(s64) = G_CONSTANT i64 1234 %fold_me:gpr(s64) = G_ASHR %copy, %fold_cst %and:gpr(s64) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: fold_lshr alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: fold_lshr ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32 = COPY $w0 ; CHECK-NEXT: TBNZW %copy, 4, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s32) = COPY $w0 %bit:gpr(s32) = G_CONSTANT i32 8 %zero:gpr(s32) = G_CONSTANT i32 0 ; We should get 4 as the test bit. %fold_cst:gpr(s32) = G_CONSTANT i32 1 %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst %and:gpr(s32) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: fold_lshr_2 alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: fold_lshr_2 ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr64 = COPY $x0 ; CHECK-NEXT: TBNZX %copy, 32, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s64) = COPY $x0 %bit:gpr(s64) = G_CONSTANT i64 8 %zero:gpr(s64) = G_CONSTANT i64 0 ; We're testing a s64. ; 3 + 29 = 32, which is less than 63, so we can fold. %fold_cst:gpr(s64) = G_CONSTANT i64 29 %fold_me:gpr(s64) = G_LSHR %copy, %fold_cst %and:gpr(s64) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s64), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: dont_fold_lshr alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: dont_fold_lshr ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32 = COPY $w0 ; CHECK-NEXT: %fold_cst:gpr32 = MOVi32imm 29 ; CHECK-NEXT: %fold_me:gpr32 = LSRVWr %copy, %fold_cst ; CHECK-NEXT: TBNZW %fold_me, 3, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s32) = COPY $w0 %bit:gpr(s32) = G_CONSTANT i32 8 %zero:gpr(s32) = G_CONSTANT i32 0 ; We're testing a s32. ; 3 + 29 = 32, which is greater than 31, so we don't fold. %fold_cst:gpr(s32) = G_CONSTANT i32 29 %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst %and:gpr(s32) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR ... --- name: lshr_negative alignment: 4 legalized: true regBankSelected: true body: | ; CHECK-LABEL: name: lshr_negative ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32 = COPY $w0 ; CHECK-NEXT: TBNZW %copy, 2, %bb.1 ; CHECK-NEXT: B %bb.0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: RET_ReallyLR bb.0: successors: %bb.0, %bb.1 liveins: $x0 %copy:gpr(s32) = COPY $w0 %bit:gpr(s32) = G_CONSTANT i32 8 %zero:gpr(s32) = G_CONSTANT i32 0 ; Constant becomes very large and wraps around. Since it's larger than the ; bit width, that means the LSHR is poison, so we can still fold. %fold_cst:gpr(s32) = G_CONSTANT i32 -1 %fold_me:gpr(s32) = G_LSHR %copy, %fold_cst %and:gpr(s32) = G_AND %fold_me, %bit %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero G_BRCOND %cmp, %bb.1 G_BR %bb.0 bb.1: RET_ReallyLR