# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -debugify-and-strip-all-safe -mtriple aarch64 -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="load_or_combine" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LITTLE # RUN: llc -debugify-and-strip-all-safe -mtriple arm64eb -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="load_or_combine" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=BIG # REQUIRES: asserts # Test that we can combine patterns like # # s8* x = ... # s32 y = (x[0] | (x[1] << 8)) | ((x[2] << 16) | (x[3] << 24)) # # Into either a load, or a load with a bswap. ... --- name: s8_loads_to_s32_little_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s8* x = ... ; s32 y = (x[0] | (x[1] << 8)) | ((x[2] << 16) | (x[3] << 24)) ; ; -> Little endian: Load from x[0] ; -> Big endian: Load from x[0] + BSWAP ; LITTLE-LABEL: name: s8_loads_to_s32_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s8_loads_to_s32_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %cst_2:_(s32) = G_CONSTANT i32 2 %cst_3:_(s32) = G_CONSTANT i32 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) ; Note the shape of the tree: ; ; byte byte byte byte ; \ / \ / ; OR OR ; \ / ; \ / ; OR %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: s8_loads_to_s32_big_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s8* x = ... ; s32 y = (x[0] << 24 | (x[1] << 16)) | ((x[2] << 8) | x[3])) ; ; -> Little endian: Load from x[0] + BSWAP ; -> Big endian: Load from x[0] ; LITTLE-LABEL: name: s8_loads_to_s32_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s8_loads_to_s32_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %cst_2:_(s32) = G_CONSTANT i32 2 %cst_3:_(s32) = G_CONSTANT i32 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %byte0:_(s32) = nuw G_SHL %elt0, %cst_24(s32) %byte1:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_8(s32) %byte3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: different_or_pattern tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; Slightly different OR tree. ; ; s8* x = ... ; s32 y = (((x[0] | (x[1] << 8)) | (x[2] << 16)) | (x[3] << 24)) ; ; -> Little endian: Load from x[0] ; -> Big endian: Load from x[0] + BSWAP ; LITTLE-LABEL: name: different_or_pattern ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: different_or_pattern ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %cst_2:_(s32) = G_CONSTANT i32 2 %cst_3:_(s32) = G_CONSTANT i32 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) ; Note the shape of the tree: ; ; byte byte ; \ / ; OR_1 byte ; \ / ; OR_2 ; \ ; ... %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %or1, %byte2 %full_load:_(s32) = G_OR %or2, %byte3 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: s16_loads_to_s32_little_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s16* x = ... ; s32 y = x[0] | (x[1] << 16) ; ; -> Little endian: Load from x[0] ; -> Big endian: Load from x[0] + BSWAP ; LITTLE-LABEL: name: s16_loads_to_s32_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s16_loads_to_s32_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: s16_loads_to_s32_big_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s16 *x = ... ; s32 y = x[1] | (x[0] << 16) ; ; -> Little endian: Load from x[0] + BSWAP ; -> Big endian: Load from x[0] ; LITTLE-LABEL: name: s16_loads_to_s32_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: s16_loads_to_s32_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt0, %cst_16(s32) %low_half:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: s16_loads_to_s64_little_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s16 *x = ... ; s32 y = (x[0] | (x[1] << 16)) | ((x[2] << 32) | (x[3] << 48)) ; ; -> Little endian: Load from x[0] ; -> Big endian: Load from x[0] + BSWAP ; LITTLE-LABEL: name: s16_loads_to_s64_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; LITTLE: $x1 = COPY %full_load(s64) ; LITTLE: RET_ReallyLR implicit $x1 ; BIG-LABEL: name: s16_loads_to_s64_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; BIG: %full_load:_(s64) = G_BSWAP [[LOAD]] ; BIG: $x1 = COPY %full_load(s64) ; BIG: RET_ReallyLR implicit $x1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_2:_(s64) = G_CONSTANT i64 2 %cst_3:_(s64) = G_CONSTANT i64 3 %cst_16:_(s64) = G_CONSTANT i64 16 %cst_32:_(s64) = G_CONSTANT i64 32 %cst_48:_(s64) = G_CONSTANT i64 48 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16)) %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_32(s64) %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 %or2:_(s64) = G_OR %byte4_byte5, %byte6_byte7 %full_load:_(s64) = G_OR %or1, %or2 $x1 = COPY %full_load(s64) RET_ReallyLR implicit $x1 ... --- name: s16_loads_to_s64_big_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s16 *x = ... ; s64 y = (x[3] | (x[2] << 16)) | ((x[1] << 32) | (x[0] << 48)) ; ; -> Little endian: Load from x[0] + BSWAP ; -> Big endian: Load from x[0] ; LITTLE-LABEL: name: s16_loads_to_s64_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; LITTLE: %full_load:_(s64) = G_BSWAP [[LOAD]] ; LITTLE: $x1 = COPY %full_load(s64) ; LITTLE: RET_ReallyLR implicit $x1 ; BIG-LABEL: name: s16_loads_to_s64_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2) ; BIG: $x1 = COPY %full_load(s64) ; BIG: RET_ReallyLR implicit $x1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_2:_(s64) = G_CONSTANT i64 2 %cst_3:_(s64) = G_CONSTANT i64 3 %cst_16:_(s64) = G_CONSTANT i64 16 %cst_32:_(s64) = G_CONSTANT i64 32 %cst_48:_(s64) = G_CONSTANT i64 48 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) %elt0:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16)) %byte0_byte1:_(s64) = nuw G_SHL %elt0, %cst_48(s64) %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_32(s64) %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_16(s64) %byte6_byte7:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 %or2:_(s64) = G_OR %byte4_byte5, %byte6_byte7 %full_load:_(s64) = G_OR %or1, %or2 $x1 = COPY %full_load(s64) RET_ReallyLR implicit $x1 ... --- name: nonzero_start_idx_positive_little_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s8* x = ... ; s32 y = (x[1] | (x[2] << 8)) | ((x[3] << 16) | (x[4] << 24)) ; ; -> Little endian: Load from x[1] ; -> Big endian: Load from x[1] + BSWAP ; LITTLE-LABEL: name: nonzero_start_idx_positive_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %cst_2:_(s32) = G_CONSTANT i32 2 %cst_3:_(s32) = G_CONSTANT i32 3 %cst_4:_(s32) = G_CONSTANT i32 4 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %elt4:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8)) %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt2, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt3, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt4, %cst_24(s32) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: nonzero_start_idx_positive_big_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s8* x = ... ; s32 y = (x[4] | (x[3] << 8)) | ((x[2] << 16) | (x[1] << 24)) ; ; -> Little endian: Load from x[1] + BSWAP ; -> Big endian: Load from x[1] ; LITTLE-LABEL: name: nonzero_start_idx_positive_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_positive_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %cst_2:_(s32) = G_CONSTANT i32 2 %cst_3:_(s32) = G_CONSTANT i32 3 %cst_4:_(s32) = G_CONSTANT i32 4 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt3, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt1, %cst_24(s32) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: nonzero_start_idx_negative_little_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s8* x = ... ; s32 y = (x[-3] | (x[-2] << 8)) | ((x[-1] << 16) | (x[0] << 24)) ; ; -> Little endian: Load from x[-3] ; -> Big endian: Load from x[-3] + BSWAP ; LITTLE-LABEL: name: nonzero_start_idx_negative_little_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_neg_1:_(s32) = G_CONSTANT i32 -1 %cst_neg_2:_(s32) = G_CONSTANT i32 -2 %cst_neg_3:_(s32) = G_CONSTANT i32 -3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32) %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32) %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8)) %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt_neg_2, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt_neg_1, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt_0, %cst_24(s32) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: nonzero_start_idx_negative_big_endian_pat tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; s8* x = ... ; s32 y = (x[0] | (x[-1] << 8)) | ((x[-2] << 16) | (x[-3] << 24)) ; ; -> Little endian: Load from x[-3] + BSWAP ; -> Big endian: Load from x[-3] ; LITTLE-LABEL: name: nonzero_start_idx_negative_big_endian_pat ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; LITTLE: %ptr:_(p0) = COPY $x0 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_negative_big_endian_pat ; BIG: liveins: $x0, $x1 ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 ; BIG: %ptr:_(p0) = COPY $x0 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_neg_1:_(s32) = G_CONSTANT i32 -1 %cst_neg_2:_(s32) = G_CONSTANT i32 -2 %cst_neg_3:_(s32) = G_CONSTANT i32 -3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32) %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32) %elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8)) %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8)) %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt_neg_1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt_neg_2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt_neg_3, %cst_24(s32) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_volatile tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; Combine should only happen with unordered loads. ; LITTLE-LABEL: name: dont_combine_volatile ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_volatile ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_wrong_memop_size tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; Combine should only happen when the loads load the same size. ; LITTLE-LABEL: name: dont_wrong_memop_size ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_wrong_memop_size ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_wrong_offset tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; This is not equivalent to a 32-bit load with/without a BSWAP: ; ; s16 *x = ... ; s32 y = x[0] | (x[1] << 24) ; LITTLE-LABEL: name: dont_combine_wrong_offset ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_wrong_offset ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_wrong_offset_2 tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; This does not correspond to a 32-bit load with/without a BSWAP: ; ; s16 *x = ... ; s32 y = x[0] | (x[1] << 8) ; LITTLE-LABEL: name: dont_combine_wrong_offset_2 ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_wrong_offset_2 ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_8:_(s32) = G_CONSTANT i32 8 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_missing_load tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; This is missing x[2], so we shouldn't combine: ; ; s16 *x = ... ; s64 y = (x[0] | (x[1] << 16)) | (x[3] << 48) ; LITTLE-LABEL: name: dont_combine_missing_load ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3 ; LITTLE: %cst_16:_(s64) = G_CONSTANT i64 16 ; LITTLE: %cst_48:_(s64) = G_CONSTANT i64 48 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) ; LITTLE: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) ; LITTLE: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) ; LITTLE: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) ; LITTLE: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 ; LITTLE: %full_load:_(s64) = G_OR %or1, %byte6_byte7 ; LITTLE: $x1 = COPY %full_load(s64) ; LITTLE: RET_ReallyLR implicit $x1 ; BIG-LABEL: name: dont_combine_missing_load ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3 ; BIG: %cst_16:_(s64) = G_CONSTANT i64 16 ; BIG: %cst_48:_(s64) = G_CONSTANT i64 48 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) ; BIG: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) ; BIG: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) ; BIG: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) ; BIG: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 ; BIG: %full_load:_(s64) = G_OR %or1, %byte6_byte7 ; BIG: $x1 = COPY %full_load(s64) ; BIG: RET_ReallyLR implicit $x1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_3:_(s64) = G_CONSTANT i64 3 %cst_16:_(s64) = G_CONSTANT i64 16 %cst_48:_(s64) = G_CONSTANT i64 48 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16)) %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64) %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64) %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3 %full_load:_(s64) = G_OR %or1, %byte6_byte7 $x1 = COPY %full_load(s64) RET_ReallyLR implicit $x1 ... --- name: dont_combine_different_addr_spaces tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; When the loads are from different address spaces, don't combine. ; LITTLE-LABEL: name: dont_combine_different_addr_spaces ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_different_addr_spaces ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16), addrspace 0) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_duplicate_idx tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; If two of the G_PTR_ADDs have the same index, then don't combine. ; ; sN *x = ... ; sM y = (x[i] << A) | (x[i] << B) ... ; LITTLE-LABEL: name: dont_combine_duplicate_idx ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 ; LITTLE: %reused_idx:_(s32) = G_CONSTANT i32 2 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1 ; LITTLE: %or2:_(s32) = G_OR %byte2, %byte3 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2 ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_duplicate_idx ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 ; BIG: %reused_idx:_(s32) = G_CONSTANT i32 2 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) ; BIG: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) ; BIG: %or1:_(s32) = G_OR %byte0, %byte1 ; BIG: %or2:_(s32) = G_OR %byte2, %byte3 ; BIG: %full_load:_(s32) = G_OR %or1, %or2 ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %reused_idx:_(s32) = G_CONSTANT i32 2 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_duplicate_offset tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; If two of the G_SHLs have the same constant, then we should not combine. ; ; sN *x = ... ; sM y = (x[i] << A) | (x[i+1] << A) ... ; LITTLE-LABEL: name: dont_combine_duplicate_offset ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2 ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; LITTLE: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) ; LITTLE: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1 ; LITTLE: %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2 ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_duplicate_offset ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2 ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) ; BIG: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) ; BIG: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) ; BIG: %or1:_(s32) = G_OR %byte0, %byte1 ; BIG: %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2 ; BIG: %full_load:_(s32) = G_OR %or1, %or2 ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %cst_2:_(s32) = G_CONSTANT i32 2 %cst_3:_(s32) = G_CONSTANT i32 3 %cst_8:_(s32) = G_CONSTANT i32 8 %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32) %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32) %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_lowest_index_not_zero_offset tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; In this case, the lowest index load (e.g. x[0]) does not end up at byte ; offset 0. We shouldn't combine. ; ; s8 *x = ... ; s32 y = (x[0] << 8) | (x[1]) | (x[2] << 16) ... ; LITTLE-LABEL: name: dont_combine_lowest_index_not_zero_offset ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2 ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) ; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; LITTLE: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1 ; LITTLE: %or2:_(s32) = G_OR %byte2, %byte3 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2 ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_lowest_index_not_zero_offset ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2 ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) ; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; BIG: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) ; BIG: %or1:_(s32) = G_OR %byte0, %byte1 ; BIG: %or2:_(s32) = G_OR %byte2, %byte3 ; BIG: %full_load:_(s32) = G_OR %or1, %or2 ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s32) = G_CONSTANT i32 1 %cst_2:_(s32) = G_CONSTANT i32 2 %cst_3:_(s32) = G_CONSTANT i32 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) ; This load is index 0 %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) ; ... But it ends up being shifted, so we shouldn't combine. %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32) %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32) %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32) %or1:_(s32) = G_OR %byte0, %byte1 %or2:_(s32) = G_OR %byte2, %byte3 %full_load:_(s32) = G_OR %or1, %or2 $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_more_than_one_use_load tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; If any load is used more than once, don't combine. We want to remove the ; entire tree. ; LITTLE-LABEL: name: dont_combine_more_than_one_use_load ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %low_half ; LITTLE: $w1 = COPY %extra_use(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_more_than_one_use_load ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: %extra_use:_(s32) = G_AND %full_load, %low_half ; BIG: $w1 = COPY %extra_use(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half %extra_use:_(s32) = G_AND %full_load, %low_half $w1 = COPY %extra_use(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_more_than_one_use_shl tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; If anything feeding into any of the ors is used more than once, don't ; combine. ; LITTLE-LABEL: name: dont_combine_more_than_one_use_shl ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %high_half ; LITTLE: $w1 = COPY %extra_use(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_more_than_one_use_shl ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: %extra_use:_(s32) = G_AND %full_load, %high_half ; BIG: $w1 = COPY %extra_use(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half %extra_use:_(s32) = G_AND %full_load, %high_half $w1 = COPY %extra_use(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_store_between_same_mbb tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; If there is a store between any of the loads, then do not combine. ; LITTLE-LABEL: name: dont_combine_store_between_same_mbb ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: %other_ptr:_(p0) = COPY $x1 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12 ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_store_between_same_mbb ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: %other_ptr:_(p0) = COPY $x1 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12 ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; Memory could be modified here, so don't combine! %other_ptr:_(p0) = COPY $x1 %some_val:_(s32) = G_CONSTANT i32 12 G_STORE %some_val, %other_ptr :: (store (s16)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: dont_combine_store_between_different_mbb tracksRegLiveness: true body: | ; LITTLE-LABEL: name: dont_combine_store_between_different_mbb ; LITTLE: bb.0: ; LITTLE: successors: %bb.1(0x80000000) ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: bb.1: ; LITTLE: successors: %bb.2(0x80000000) ; LITTLE: liveins: $x0, $x1 ; LITTLE: %other_ptr:_(p0) = COPY $x1 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12 ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) ; LITTLE: bb.2: ; LITTLE: liveins: $x0, $x1 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_store_between_different_mbb ; BIG: bb.0: ; BIG: successors: %bb.1(0x80000000) ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: bb.1: ; BIG: successors: %bb.2(0x80000000) ; BIG: liveins: $x0, $x1 ; BIG: %other_ptr:_(p0) = COPY $x1 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12 ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16)) ; BIG: bb.2: ; BIG: liveins: $x0, $x1 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 ; There is a store between the two loads, hidden away in a different MBB. ; We should not combine here. bb.0: successors: %bb.1(0x80000000) liveins: $x0, $x1 ; If there is a store between any of the loads, then do not combine. %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) bb.1: liveins: $x0, $x1 successors: %bb.2(0x80000000) ; Memory could be modified here, so don't combine! %other_ptr:_(p0) = COPY $x1 %some_val:_(s32) = G_CONSTANT i32 12 G_STORE %some_val, %other_ptr :: (store (s16)) bb.2: liveins: $x0, $x1 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: different_mbb tracksRegLiveness: true body: | ; LITTLE-LABEL: name: different_mbb ; LITTLE: bb.0: ; LITTLE: successors: %bb.1(0x80000000) ; LITTLE: liveins: $x0, $x1 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; LITTLE: bb.1: ; LITTLE: liveins: $x0, $x1 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: different_mbb ; BIG: bb.0: ; BIG: successors: %bb.1(0x80000000) ; BIG: liveins: $x0, $x1 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) ; BIG: bb.1: ; BIG: liveins: $x0, $x1 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 ; It should be possible to combine here, but it's not supported right now. bb.0: successors: %bb.1(0x80000000) liveins: $x0, $x1 %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) bb.1: liveins: $x0, $x1 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: load_first tracksRegLiveness: true body: | bb.0: liveins: $x0, $x1 ; Test for a bug fix for predecessor-checking code. ; LITTLE-LABEL: name: load_first ; LITTLE: liveins: $x0, $x1 ; LITTLE: %ptr:_(p0) = COPY $x1 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: load_first ; BIG: liveins: $x0, $x1 ; BIG: %ptr:_(p0) = COPY $x1 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16)) %cst_1:_(s64) = G_CONSTANT i64 1 %cst_16:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16)) %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32) %full_load:_(s32) = G_OR %low_half, %high_half $w1 = COPY %full_load(s32) RET_ReallyLR implicit $w1 ... --- name: store_between_loads_and_or alignment: 4 tracksRegLiveness: true liveins: - { reg: '$x0' } - { reg: '$x1' } frameInfo: maxAlignment: 1 body: | bb.1: liveins: $x0, $x1 ; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR. ; We could have a G_STORE in between which may not be safe to move the load across. liveins: $x0, $x1 ; LITTLE-LABEL: name: store_between_loads_and_or ; LITTLE: liveins: $x0, $x1, $x0, $x1 ; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) ; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8)) ; LITTLE: $w0 = COPY [[LOAD]](s32) ; LITTLE: RET_ReallyLR implicit $w0 ; BIG-LABEL: name: store_between_loads_and_or ; BIG: liveins: $x0, $x1, $x0, $x1 ; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) ; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]] ; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8)) ; BIG: $w0 = COPY [[BSWAP]](s32) ; BIG: RET_ReallyLR implicit $w0 %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %12:_(s8) = G_CONSTANT i8 1 %15:_(s32) = G_CONSTANT i32 8 %19:_(s32) = G_CONSTANT i32 16 %23:_(s32) = G_CONSTANT i32 24 %13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8)) %3:_(s64) = G_CONSTANT i64 1 %4:_(p0) = G_PTR_ADD %0:_, %3:_(s64) %14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8)) %6:_(s64) = G_CONSTANT i64 2 %7:_(p0) = G_PTR_ADD %0:_, %6:_(s64) %18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8)) %9:_(s64) = G_CONSTANT i64 3 %10:_(p0) = G_PTR_ADD %0:_, %9:_(s64) %22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8)) G_STORE %12:_(s8), %1:_(p0) :: (store (s8)) %16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32) %17:_(s32) = G_OR %16:_, %13:_ %20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32) %21:_(s32) = G_OR %17:_, %20:_ %24:_(s32) = nuw G_SHL %22:_, %23:_(s32) %25:_(s32) = G_OR %21:_, %24:_ $w0 = COPY %25:_(s32) RET_ReallyLR implicit $w0 ...