Skip to content

Commit 6f91bfc

Browse files
[LLVM][AArch64ISel] Fix IsLE predicate setting so it does not affect BE codegen. (#135978)
Ensure little endian specific patterns, not just their multi-classes, are protected by IsLE.
1 parent a09fd9c commit 6f91bfc

File tree

3 files changed

+90
-45
lines changed

3 files changed

+90
-45
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2752,7 +2752,7 @@ let Predicates = [HasSVE_or_SME] in {
27522752

27532753
// For big endian, only BITCASTs involving same sized vector types with same
27542754
// size vector elements can be isel'd directly.
2755-
let Predicates = [IsLE] in
2755+
let Predicates = [HasSVE_or_SME, IsLE] in
27562756
foreach VT = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
27572757
foreach VT2 = [ nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16 ] in
27582758
if !ne(VT,VT2) then
@@ -3002,24 +3002,25 @@ let Predicates = [HasSVE_or_SME] in {
30023002
defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
30033003
defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
30043004

3005-
// Allow using the reg+reg form of ld1b/st1b for memory accesses with the
3006-
// same width as nxv16i8. This saves an add in cases where we would
3007-
// otherwise compute the address separately.
3008-
multiclass unpred_loadstore_bitcast<ValueType Ty> {
3009-
let Predicates = [IsLE] in {
3005+
let Predicates = [HasSVE_or_SME, IsLE] in {
3006+
// Allow using the reg+reg form of ld1b/st1b for memory accesses with the
3007+
// same width as nxv16i8. This saves an add in cases where we would
3008+
// otherwise compute the address separately.
3009+
multiclass unpred_loadstore_bitcast<ValueType Ty> {
30103010
def : Pat<(Ty (load (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset))),
30113011
(LD1B (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
30123012
def : Pat<(store Ty:$val, (am_sve_regreg_lsl0 GPR64sp:$base, GPR64:$offset)),
30133013
(ST1B ZPR:$val, (PTRUE_B 31), GPR64sp:$base, GPR64:$offset)>;
30143014
}
3015+
3016+
defm : unpred_loadstore_bitcast<nxv8i16>;
3017+
defm : unpred_loadstore_bitcast<nxv8f16>;
3018+
defm : unpred_loadstore_bitcast<nxv8bf16>;
3019+
defm : unpred_loadstore_bitcast<nxv4f32>;
3020+
defm : unpred_loadstore_bitcast<nxv4i32>;
3021+
defm : unpred_loadstore_bitcast<nxv2i64>;
3022+
defm : unpred_loadstore_bitcast<nxv2f64>;
30153023
}
3016-
defm : unpred_loadstore_bitcast<nxv8i16>;
3017-
defm : unpred_loadstore_bitcast<nxv8f16>;
3018-
defm : unpred_loadstore_bitcast<nxv8bf16>;
3019-
defm : unpred_loadstore_bitcast<nxv4f32>;
3020-
defm : unpred_loadstore_bitcast<nxv4i32>;
3021-
defm : unpred_loadstore_bitcast<nxv2i64>;
3022-
defm : unpred_loadstore_bitcast<nxv2f64>;
30233024

30243025
// Allow using LDR/STR to avoid the predicate dependence.
30253026
let Predicates = [HasSVE_or_SME, IsLE, AllowMisalignedMemAccesses] in

llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
3+
; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
34

45
; LD1B
56

@@ -15,33 +16,54 @@ define <vscale x 16 x i8> @ld1_nxv16i8(ptr %addr, i64 %off) {
1516
}
1617

1718
define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(ptr %addr, i64 %off) {
18-
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16:
19-
; CHECK: // %bb.0:
20-
; CHECK-NEXT: ptrue p0.b
21-
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
22-
; CHECK-NEXT: ret
19+
; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i16:
20+
; CHECK-LE: // %bb.0:
21+
; CHECK-LE-NEXT: ptrue p0.b
22+
; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
23+
; CHECK-LE-NEXT: ret
24+
;
25+
; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i16:
26+
; CHECK-BE: // %bb.0:
27+
; CHECK-BE-NEXT: ptrue p0.h
28+
; CHECK-BE-NEXT: add x8, x0, x1
29+
; CHECK-BE-NEXT: ld1h { z0.h }, p0/z, [x8]
30+
; CHECK-BE-NEXT: ret
2331
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
2432
%val = load volatile <vscale x 8 x i16>, ptr %ptr
2533
ret <vscale x 8 x i16> %val
2634
}
2735

2836
define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(ptr %addr, i64 %off) {
29-
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32:
30-
; CHECK: // %bb.0:
31-
; CHECK-NEXT: ptrue p0.b
32-
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
33-
; CHECK-NEXT: ret
37+
; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i32:
38+
; CHECK-LE: // %bb.0:
39+
; CHECK-LE-NEXT: ptrue p0.b
40+
; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
41+
; CHECK-LE-NEXT: ret
42+
;
43+
; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i32:
44+
; CHECK-BE: // %bb.0:
45+
; CHECK-BE-NEXT: ptrue p0.s
46+
; CHECK-BE-NEXT: add x8, x0, x1
47+
; CHECK-BE-NEXT: ld1w { z0.s }, p0/z, [x8]
48+
; CHECK-BE-NEXT: ret
3449
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
3550
%val = load volatile <vscale x 4 x i32>, ptr %ptr
3651
ret <vscale x 4 x i32> %val
3752
}
3853

3954
define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(ptr %addr, i64 %off) {
40-
; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64:
41-
; CHECK: // %bb.0:
42-
; CHECK-NEXT: ptrue p0.b
43-
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
44-
; CHECK-NEXT: ret
55+
; CHECK-LE-LABEL: ld1_nxv16i8_bitcast_to_i64:
56+
; CHECK-LE: // %bb.0:
57+
; CHECK-LE-NEXT: ptrue p0.b
58+
; CHECK-LE-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
59+
; CHECK-LE-NEXT: ret
60+
;
61+
; CHECK-BE-LABEL: ld1_nxv16i8_bitcast_to_i64:
62+
; CHECK-BE: // %bb.0:
63+
; CHECK-BE-NEXT: ptrue p0.d
64+
; CHECK-BE-NEXT: add x8, x0, x1
65+
; CHECK-BE-NEXT: ld1d { z0.d }, p0/z, [x8]
66+
; CHECK-BE-NEXT: ret
4567
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
4668
%val = load volatile <vscale x 2 x i64>, ptr %ptr
4769
ret <vscale x 2 x i64> %val

llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
3+
; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE
34

45
; ST1B
56

@@ -15,33 +16,54 @@ define void @st1_nxv16i8(ptr %addr, i64 %off, <vscale x 16 x i8> %val) {
1516
}
1617

1718
define void @st1_nxv16i8_bitcast_from_i16(ptr %addr, i64 %off, <vscale x 8 x i16> %val) {
18-
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i16:
19-
; CHECK: // %bb.0:
20-
; CHECK-NEXT: ptrue p0.b
21-
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
22-
; CHECK-NEXT: ret
19+
; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i16:
20+
; CHECK-LE: // %bb.0:
21+
; CHECK-LE-NEXT: ptrue p0.b
22+
; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
23+
; CHECK-LE-NEXT: ret
24+
;
25+
; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i16:
26+
; CHECK-BE: // %bb.0:
27+
; CHECK-BE-NEXT: ptrue p0.h
28+
; CHECK-BE-NEXT: add x8, x0, x1
29+
; CHECK-BE-NEXT: st1h { z0.h }, p0, [x8]
30+
; CHECK-BE-NEXT: ret
2331
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
2432
store <vscale x 8 x i16> %val, ptr %ptr
2533
ret void
2634
}
2735

2836
define void @st1_nxv16i8_bitcast_from_i32(ptr %addr, i64 %off, <vscale x 4 x i32> %val) {
29-
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i32:
30-
; CHECK: // %bb.0:
31-
; CHECK-NEXT: ptrue p0.b
32-
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
33-
; CHECK-NEXT: ret
37+
; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i32:
38+
; CHECK-LE: // %bb.0:
39+
; CHECK-LE-NEXT: ptrue p0.b
40+
; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
41+
; CHECK-LE-NEXT: ret
42+
;
43+
; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i32:
44+
; CHECK-BE: // %bb.0:
45+
; CHECK-BE-NEXT: ptrue p0.s
46+
; CHECK-BE-NEXT: add x8, x0, x1
47+
; CHECK-BE-NEXT: st1w { z0.s }, p0, [x8]
48+
; CHECK-BE-NEXT: ret
3449
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
3550
store <vscale x 4 x i32> %val, ptr %ptr
3651
ret void
3752
}
3853

3954
define void @st1_nxv16i8_bitcast_from_i64(ptr %addr, i64 %off, <vscale x 2 x i64> %val) {
40-
; CHECK-LABEL: st1_nxv16i8_bitcast_from_i64:
41-
; CHECK: // %bb.0:
42-
; CHECK-NEXT: ptrue p0.b
43-
; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
44-
; CHECK-NEXT: ret
55+
; CHECK-LE-LABEL: st1_nxv16i8_bitcast_from_i64:
56+
; CHECK-LE: // %bb.0:
57+
; CHECK-LE-NEXT: ptrue p0.b
58+
; CHECK-LE-NEXT: st1b { z0.b }, p0, [x0, x1]
59+
; CHECK-LE-NEXT: ret
60+
;
61+
; CHECK-BE-LABEL: st1_nxv16i8_bitcast_from_i64:
62+
; CHECK-BE: // %bb.0:
63+
; CHECK-BE-NEXT: ptrue p0.d
64+
; CHECK-BE-NEXT: add x8, x0, x1
65+
; CHECK-BE-NEXT: st1d { z0.d }, p0, [x8]
66+
; CHECK-BE-NEXT: ret
4567
%ptr = getelementptr inbounds i8, ptr %addr, i64 %off
4668
store <vscale x 2 x i64> %val, ptr %ptr
4769
ret void

0 commit comments

Comments
 (0)