Skip to content

[LLVM][AArch64ISel] Fix IsLE predicate setting so it does not affect BE codegen. #135978

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 14 additions & 13 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2729,7 +2729,7 @@ let Predicates = [HasSVE_or_SME] in {

// For big endian, only BITCASTs involving same sized vector types with same
// size vector elements can be isel'd directly.
let Predicates = [IsLE] in
let Predicates = [HasSVE_or_SME, IsLE] in
foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
if !ne(VT,VT2) then
Expand Down Expand Up @@ -2979,24 +2979,25 @@ let Predicates = [HasSVE_or_SME] in {
defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;

// Allow using the reg+reg form of ld1b/st1b for memory accesses with the
// same width as nxv16i8. This saves an add in cases where we would
// otherwise compute the address separately.
multiclass unpred_loadstore_bitcast<ValueType Ty> {
let Predicates = [IsLE] in {
let Predicates = [HasSVE_or_SME, IsLE] in {
// Allow using the reg+reg form of ld1b/st1b for memory accesses with the
// same width as nxv16i8. This saves an add in cases where we would
// otherwise compute the address separately.
multiclass unpred_loadstore_bitcast<ValueType Ty> {
def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))),
(LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
def : Pat<(store Ty:$val, (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)),
(ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
}

defm : unpred_loadstore_bitcast<nxv8i16>;
defm : unpred_loadstore_bitcast<nxv8f16>;
defm : unpred_loadstore_bitcast<nxv8bf16>;
defm : unpred_loadstore_bitcast<nxv4f32>;
defm : unpred_loadstore_bitcast<nxv4i32>;
defm : unpred_loadstore_bitcast<nxv2i64>;
defm : unpred_loadstore_bitcast<nxv2f64>;
}
defm : unpred_loadstore_bitcast<nxv8i16>;
defm : unpred_loadstore_bitcast<nxv8f16>;
defm : unpred_loadstore_bitcast<nxv8bf16>;
defm : unpred_loadstore_bitcast<nxv4f32>;
defm : unpred_loadstore_bitcast<nxv4i32>;
defm : unpred_loadstore_bitcast<nxv2i64>;
defm : unpred_loadstore_bitcast<nxv2f64>;

// Allow using LDR/STR to avoid the predicate dependence.
let Predicates = [HasSVE_or_SME, IsLE, AllowMisalignedMemAccesses] in
Expand Down
54 changes: 38 additions & 16 deletions llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE

; LD1B

Expand All @@ -15,33 +16,54 @@ define <vscale x 16 x i8> @ld1_nxv16i8(ptr %addr, i64 %off) {
}

define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(ptr %addr, i64 %off) {
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
; CHECK-NEXT: ret
; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i16:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: ptrue p0.b
; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i16:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ptrue p0.h
; CHECK-BE-NEXT: add x8, x0, x1
; CHECK-BE-NEXT: ld1h { z0.h }, p0/z, [x8]
; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
%val = load volatile <vscale x 8 x i16>, ptr %ptr
ret <vscale x 8 x i16> %val
}

define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(ptr %addr, i64 %off) {
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
; CHECK-NEXT: ret
; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i32:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: ptrue p0.b
; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i32:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ptrue p0.s
; CHECK-BE-NEXT: add x8, x0, x1
; CHECK-BE-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
%val = load volatile <vscale x 4 x i32>, ptr %ptr
ret <vscale x 4 x i32> %val
}

define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(ptr %addr, i64 %off) {
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
; CHECK-NEXT: ret
; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i64:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: ptrue p0.b
; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i64:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ptrue p0.d
; CHECK-BE-NEXT: add x8, x0, x1
; CHECK-BE-NEXT: ld1d { z0.d }, p0/z, [x8]
; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
%val = load volatile <vscale x 2 x i64>, ptr %ptr
ret <vscale x 2 x i64> %val
Expand Down
54 changes: 38 additions & 16 deletions llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE

; ST1B

Expand All @@ -15,33 +16,54 @@ define void @st1_nxv16i8(ptr %addr, i64 %off, <vscale x 16 x i8> %val) {
}

define void @st1_nxv16i8_bitcast_from_i16(ptr %addr, i64 %off, <vscale x 8 x i16> %val) {
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
; CHECK-NEXT: ret
; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i16:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: ptrue p0.b
; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i16:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ptrue p0.h
; CHECK-BE-NEXT: add x8, x0, x1
; CHECK-BE-NEXT: st1h { z0.h }, p0, [x8]
; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
store <vscale x 8 x i16> %val, ptr %ptr
ret void
}

define void @st1_nxv16i8_bitcast_from_i32(ptr %addr, i64 %off, <vscale x 4 x i32> %val) {
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
; CHECK-NEXT: ret
; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i32:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: ptrue p0.b
; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i32:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ptrue p0.s
; CHECK-BE-NEXT: add x8, x0, x1
; CHECK-BE-NEXT: st1w { z0.s }, p0, [x8]
; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
store <vscale x 4 x i32> %val, ptr %ptr
ret void
}

define void @st1_nxv16i8_bitcast_from_i64(ptr %addr, i64 %off, <vscale x 2 x i64> %val) {
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
; CHECK-NEXT: ret
; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i64:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: ptrue p0.b
; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i64:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: ptrue p0.d
; CHECK-BE-NEXT: add x8, x0, x1
; CHECK-BE-NEXT: st1d { z0.d }, p0, [x8]
; CHECK-BE-NEXT: ret
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
store <vscale x 2 x i64> %val, ptr %ptr
ret void
Expand Down
Loading