Skip to content

[LLVM][AArch64ISel] Fix IsLE predicate setting so it does not affect BE codegen. #135978

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2025

Conversation

paulwalker-arm
Copy link
Collaborator

Ensure little endian specific patterns, not just their multi-classes, are protected by IsLE.

Whilst unlikely to hit us (and I think practically untestable) I've also update the predicate setting to include the predicates they are "nested" inside. This brings them inline with how the other predicates are specified.

@llvmbot
Copy link
Member

llvmbot commented Apr 16, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

Changes

Ensure little endian specific patterns, not just their multi-classes, are protected by IsLE.

Whilst unlikely to hit us (and I think practically untestable) I've also update the predicate setting to include the predicates they are "nested" inside. This brings them inline with how the other predicates are specified.


Full diff: https://github.com/llvm/llvm-project/pull/135978.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+14-13)
  • (modified) llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll (+38-16)
  • (modified) llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll (+38-16)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a2f326c994c2f..bb0066ce69fce 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2729,7 +2729,7 @@ let Predicates = [HasSVE_or_SME] in {
 
   // For big endian, only BITCASTs involving same sized vector types with same
   // size vector elements can be isel'd directly.
-  let Predicates = [IsLE] in
+  let Predicates = [HasSVE_or_SME, IsLE] in
     foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
       foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
         if !ne(VT,VT2) then
@@ -2979,24 +2979,25 @@ let Predicates = [HasSVE_or_SME] in {
   defm : unpred_load<        load, nxv2f32,  LD1W_D,  LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
   defm : unpred_load<        load, nxv2f64,    LD1D,    LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
 
-  // Allow using the reg+reg form of ld1b/st1b for memory accesses with the
-  // same width as nxv16i8.  This saves an add in cases where we would
-  // otherwise compute the address separately.
-  multiclass unpred_loadstore_bitcast<ValueType Ty> {
-    let Predicates = [IsLE] in {
+  let Predicates = [HasSVE_or_SME, IsLE] in {
+    // Allow using the reg+reg form of ld1b/st1b for memory accesses with the
+    // same width as nxv16i8.  This saves an add in cases where we would
+    // otherwise compute the address separately.
+    multiclass unpred_loadstore_bitcast<ValueType Ty> {
       def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))),
                 (LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
       def : Pat<(store Ty:$val, (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)),
                 (ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
     }
+
+    defm : unpred_loadstore_bitcast<nxv8i16>;
+    defm : unpred_loadstore_bitcast<nxv8f16>;
+    defm : unpred_loadstore_bitcast<nxv8bf16>;
+    defm : unpred_loadstore_bitcast<nxv4f32>;
+    defm : unpred_loadstore_bitcast<nxv4i32>;
+    defm : unpred_loadstore_bitcast<nxv2i64>;
+    defm : unpred_loadstore_bitcast<nxv2f64>;
   }
-  defm : unpred_loadstore_bitcast<nxv8i16>;
-  defm : unpred_loadstore_bitcast<nxv8f16>;
-  defm : unpred_loadstore_bitcast<nxv8bf16>;
-  defm : unpred_loadstore_bitcast<nxv4f32>;
-  defm : unpred_loadstore_bitcast<nxv4i32>;
-  defm : unpred_loadstore_bitcast<nxv2i64>;
-  defm : unpred_loadstore_bitcast<nxv2f64>;
 
   // Allow using LDR/STR to avoid the predicate dependence.
   let Predicates = [HasSVE_or_SME, IsLE, AllowMisalignedMemAccesses] in
diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
index 3f31917b125b7..05abfa319d389 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 
 ; LD1B
 
@@ -15,33 +16,54 @@ define <vscale x 16 x i8> @ld1_nxv16i8(ptr %addr, i64 %off) {
 }
 
 define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(ptr %addr, i64 %off) {
-; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i16:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    ptrue p0.b
+; CHECK-LE-NEXT:    ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i16:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    ptrue p0.h
+; CHECK-BE-NEXT:    add x8, x0, x1
+; CHECK-BE-NEXT:    ld1h { z0.h }, p0/z, [x8]
+; CHECK-BE-NEXT:    ret
   %ptr = getelementptr inbounds i8, ptr %addr, i64 %off
   %val = load volatile <vscale x 8 x i16>, ptr %ptr
   ret <vscale x 8 x i16> %val
 }
 
 define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(ptr %addr, i64 %off) {
-; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i32:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    ptrue p0.b
+; CHECK-LE-NEXT:    ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i32:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    ptrue p0.s
+; CHECK-BE-NEXT:    add x8, x0, x1
+; CHECK-BE-NEXT:    ld1w { z0.s }, p0/z, [x8]
+; CHECK-BE-NEXT:    ret
   %ptr = getelementptr inbounds i8, ptr %addr, i64 %off
   %val = load volatile <vscale x 4 x i32>, ptr %ptr
   ret <vscale x 4 x i32> %val
 }
 
 define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(ptr %addr, i64 %off) {
-; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0, x1]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i64:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    ptrue p0.b
+; CHECK-LE-NEXT:    ld1b { z0.b }, p0/z, [x0, x1]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    ptrue p0.d
+; CHECK-BE-NEXT:    add x8, x0, x1
+; CHECK-BE-NEXT:    ld1d { z0.d }, p0/z, [x8]
+; CHECK-BE-NEXT:    ret
   %ptr = getelementptr inbounds i8, ptr %addr, i64 %off
   %val = load volatile <vscale x 2 x i64>, ptr %ptr
   ret <vscale x 2 x i64> %val
diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
index d859bbb567ebb..0bf6b12a5d020 100644
--- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 
 ; ST1B
 
@@ -15,33 +16,54 @@ define void @st1_nxv16i8(ptr %addr, i64 %off, <vscale x 16 x i8> %val) {
 }
 
 define void @st1_nxv16i8_bitcast_from_i16(ptr %addr, i64 %off, <vscale x 8 x i16> %val) {
-; CHECK-LABEL: st1_nxv16i8_bitcast_from_i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    st1b { z0.b }, p0, [x0, x1]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i16:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    ptrue p0.b
+; CHECK-LE-NEXT:    st1b { z0.b }, p0, [x0, x1]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i16:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    ptrue p0.h
+; CHECK-BE-NEXT:    add x8, x0, x1
+; CHECK-BE-NEXT:    st1h { z0.h }, p0, [x8]
+; CHECK-BE-NEXT:    ret
   %ptr = getelementptr inbounds i8, ptr %addr, i64 %off
   store <vscale x 8 x i16> %val, ptr %ptr
   ret void
 }
 
 define void @st1_nxv16i8_bitcast_from_i32(ptr %addr, i64 %off, <vscale x 4 x i32> %val) {
-; CHECK-LABEL: st1_nxv16i8_bitcast_from_i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    st1b { z0.b }, p0, [x0, x1]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i32:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    ptrue p0.b
+; CHECK-LE-NEXT:    st1b { z0.b }, p0, [x0, x1]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i32:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    ptrue p0.s
+; CHECK-BE-NEXT:    add x8, x0, x1
+; CHECK-BE-NEXT:    st1w { z0.s }, p0, [x8]
+; CHECK-BE-NEXT:    ret
   %ptr = getelementptr inbounds i8, ptr %addr, i64 %off
   store <vscale x 4 x i32> %val, ptr %ptr
   ret void
 }
 
 define void @st1_nxv16i8_bitcast_from_i64(ptr %addr, i64 %off, <vscale x 2 x i64> %val) {
-; CHECK-LABEL: st1_nxv16i8_bitcast_from_i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    st1b { z0.b }, p0, [x0, x1]
-; CHECK-NEXT:    ret
+; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i64:
+; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    ptrue p0.b
+; CHECK-LE-NEXT:    st1b { z0.b }, p0, [x0, x1]
+; CHECK-LE-NEXT:    ret
+;
+; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i64:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    ptrue p0.d
+; CHECK-BE-NEXT:    add x8, x0, x1
+; CHECK-BE-NEXT:    st1d { z0.d }, p0, [x8]
+; CHECK-BE-NEXT:    ret
   %ptr = getelementptr inbounds i8, ptr %addr, i64 %off
   store <vscale x 2 x i64> %val, ptr %ptr
   ret void

Copy link
Contributor

@rj-jesus rj-jesus left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing this, looks good to me.

@paulwalker-arm paulwalker-arm merged commit 6f91bfc into llvm:main Apr 17, 2025
13 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants