Skip to content

Commit 2f752cf

Browse files
authored
[AArch64][GlobalISel] Adopt some Ld* patterns to reduce codegen regressions (#135492)
This is an update of #69607 after #101675 and #105686. Ld1Lane64Pat, Ld1Lane128Pat, LoadInsertPatterns, Neon_INS_elt_pattern from SelectionDAG didn't work for GlobalISel on v8i8 and v16i8 vector types, because vector_insert for v8i8, v16i8 in SelectionDAG expects i32 scalar argument type, whereas G_INSERT_VECTOR_ELT expects s8.
1 parent 53fe3df commit 2f752cf

14 files changed

+179
-344
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,7 @@ let RecomputePerFunction = 1 in {
11151115

11161116
def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
11171117
def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1118+
11181119
// Toggles patterns which aren't beneficial in GlobalISel when we aren't
11191120
// optimizing. This allows us to selectively use patterns without impacting
11201121
// SelectionDAG's behaviour.
@@ -4038,6 +4039,10 @@ multiclass LoadInsertPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType
40384039
ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
40394040
}
40404041

4042+
// Accept i8 scalar argument in GlobalISel.
4043+
defm : LoadInsertPatterns<load, v16i8, v8i8, nxv16i8, i8,
4044+
LDRBui, LDURBi, LDRBroW, LDRBroX,
4045+
ro8, am_indexed8, am_unscaled8, uimm12s1, bsub>;
40414046
defm : LoadInsertPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32,
40424047
LDRBui, LDURBi, LDRBroW, LDRBroX,
40434048
ro8, am_indexed8, am_unscaled8, uimm12s1, bsub>;
@@ -7309,12 +7314,12 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
73097314
(VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
73107315
(i64 imm:$Immd))),
73117316
(INS V128:$src, imm:$Immd,
7312-
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
7317+
(VT128 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub)), imm:$Immn)>;
73137318

73147319
def : Pat<(VT64 (vector_insert V64:$src,
73157320
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
73167321
(i64 imm:$Immd))),
7317-
(EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
7322+
(EXTRACT_SUBREG (INS (VT128 (SUBREG_TO_REG (i64 0), V64:$src, dsub)),
73187323
imm:$Immd, V128:$Rn, imm:$Immn),
73197324
dsub)>;
73207325

@@ -7332,6 +7337,8 @@ defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi1
73327337
defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane, DUPi32, ssub>;
73337338
defm : Neon_INS_elt_pattern<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane, DUPi64, dsub>;
73347339

7340+
// Accept i8 scalar argument in GlobalISel.
7341+
defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i8, VectorIndexB, INSvi8lane, DUPi8, bsub>;
73357342
defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane, DUPi8, bsub>;
73367343
defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane, DUPi16, hsub>;
73377344
defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane, DUPi32, ssub>;
@@ -8809,6 +8816,8 @@ class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
88098816
(STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
88108817
(LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
88118818

8819+
// Accept i8 scalar argument in GlobalISel.
8820+
def : Ld1Lane128Pat<load, VectorIndexB, v16i8, i8, LD1i8>;
88128821
def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
88138822
def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
88148823
def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
@@ -8882,6 +8891,8 @@ class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
88828891
VecIndex:$idx, GPR64sp:$Rn),
88838892
dsub)>;
88848893

8894+
// Accept i8 scalar argument in GlobalISel.
8895+
def : Ld1Lane64Pat<load, VectorIndexB, v8i8, i8, LD1i8>;
88858896
def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
88868897
def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
88878898
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;

llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -85,24 +85,16 @@ define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) {
8585
; CHECK-GISEL-NEXT: mov x8, sp
8686
; CHECK-GISEL-NEXT: str q0, [sp]
8787
; CHECK-GISEL-NEXT: and x9, x9, #0xf
88-
; CHECK-GISEL-NEXT: mov b2, v0.b[1]
89-
; CHECK-GISEL-NEXT: mov b3, v0.b[2]
9088
; CHECK-GISEL-NEXT: lsl x10, x9, #1
9189
; CHECK-GISEL-NEXT: sub x9, x10, x9
9290
; CHECK-GISEL-NEXT: ldr b1, [x8, x9]
93-
; CHECK-GISEL-NEXT: mov v1.b[0], v1.b[0]
94-
; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0]
95-
; CHECK-GISEL-NEXT: mov b2, v0.b[3]
96-
; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0]
97-
; CHECK-GISEL-NEXT: mov b3, v0.b[4]
98-
; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0]
99-
; CHECK-GISEL-NEXT: mov b2, v0.b[5]
100-
; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0]
101-
; CHECK-GISEL-NEXT: mov b3, v0.b[6]
102-
; CHECK-GISEL-NEXT: mov b0, v0.b[7]
103-
; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0]
104-
; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0]
105-
; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0]
91+
; CHECK-GISEL-NEXT: mov v1.b[1], v0.b[1]
92+
; CHECK-GISEL-NEXT: mov v1.b[2], v0.b[2]
93+
; CHECK-GISEL-NEXT: mov v1.b[3], v0.b[3]
94+
; CHECK-GISEL-NEXT: mov v1.b[4], v0.b[4]
95+
; CHECK-GISEL-NEXT: mov v1.b[5], v0.b[5]
96+
; CHECK-GISEL-NEXT: mov v1.b[6], v0.b[6]
97+
; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[7]
10698
; CHECK-GISEL-NEXT: fmov d0, d1
10799
; CHECK-GISEL-NEXT: add sp, sp, #16
108100
; CHECK-GISEL-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13326,10 +13326,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
1332613326
;
1332713327
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane:
1332813328
; CHECK-GI: ; %bb.0:
13329-
; CHECK-GI-NEXT: ldr b1, [x0]
13329+
; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0]
1333013330
; CHECK-GI-NEXT: add x8, x0, x2
1333113331
; CHECK-GI-NEXT: str x8, [x1]
13332-
; CHECK-GI-NEXT: mov.b v0[1], v1[0]
1333313332
; CHECK-GI-NEXT: ret
1333413333
%tmp1 = load i8, ptr %bar
1333513334
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
@@ -13373,11 +13372,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i
1337313372
;
1337413373
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane:
1337513374
; CHECK-GI: ; %bb.0:
13376-
; CHECK-GI-NEXT: ldr b1, [x0]
1337713375
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0
1337813376
; CHECK-GI-NEXT: add x8, x0, x2
13377+
; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0]
1337913378
; CHECK-GI-NEXT: str x8, [x1]
13380-
; CHECK-GI-NEXT: mov.b v0[1], v1[0]
1338113379
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
1338213380
; CHECK-GI-NEXT: ret
1338313381
%tmp1 = load i8, ptr %bar
@@ -13891,43 +13889,20 @@ define void @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half>
1389113889
}
1389213890

1389313891
define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) {
13894-
; CHECK-SD-LABEL: test_ld1lane_build_i8:
13895-
; CHECK-SD: ; %bb.0:
13896-
; CHECK-SD-NEXT: ldr b1, [x0]
13897-
; CHECK-SD-NEXT: ldr x8, [sp]
13898-
; CHECK-SD-NEXT: ld1.b { v1 }[1], [x1]
13899-
; CHECK-SD-NEXT: ld1.b { v1 }[2], [x2]
13900-
; CHECK-SD-NEXT: ld1.b { v1 }[3], [x3]
13901-
; CHECK-SD-NEXT: ld1.b { v1 }[4], [x4]
13902-
; CHECK-SD-NEXT: ld1.b { v1 }[5], [x5]
13903-
; CHECK-SD-NEXT: ld1.b { v1 }[6], [x6]
13904-
; CHECK-SD-NEXT: ld1.b { v1 }[7], [x7]
13905-
; CHECK-SD-NEXT: sub.8b v0, v1, v0
13906-
; CHECK-SD-NEXT: str d0, [x8]
13907-
; CHECK-SD-NEXT: ret
13908-
;
13909-
; CHECK-GI-LABEL: test_ld1lane_build_i8:
13910-
; CHECK-GI: ; %bb.0:
13911-
; CHECK-GI-NEXT: ldr b1, [x0]
13912-
; CHECK-GI-NEXT: ldr b2, [x1]
13913-
; CHECK-GI-NEXT: ldr x8, [sp]
13914-
; CHECK-GI-NEXT: mov.b v1[0], v1[0]
13915-
; CHECK-GI-NEXT: mov.b v1[1], v2[0]
13916-
; CHECK-GI-NEXT: ldr b2, [x2]
13917-
; CHECK-GI-NEXT: mov.b v1[2], v2[0]
13918-
; CHECK-GI-NEXT: ldr b2, [x3]
13919-
; CHECK-GI-NEXT: mov.b v1[3], v2[0]
13920-
; CHECK-GI-NEXT: ldr b2, [x4]
13921-
; CHECK-GI-NEXT: mov.b v1[4], v2[0]
13922-
; CHECK-GI-NEXT: ldr b2, [x5]
13923-
; CHECK-GI-NEXT: mov.b v1[5], v2[0]
13924-
; CHECK-GI-NEXT: ldr b2, [x6]
13925-
; CHECK-GI-NEXT: mov.b v1[6], v2[0]
13926-
; CHECK-GI-NEXT: ldr b2, [x7]
13927-
; CHECK-GI-NEXT: mov.b v1[7], v2[0]
13928-
; CHECK-GI-NEXT: sub.8b v0, v1, v0
13929-
; CHECK-GI-NEXT: str d0, [x8]
13930-
; CHECK-GI-NEXT: ret
13892+
; CHECK-LABEL: test_ld1lane_build_i8:
13893+
; CHECK: ; %bb.0:
13894+
; CHECK-NEXT: ldr b1, [x0]
13895+
; CHECK-NEXT: ldr x8, [sp]
13896+
; CHECK-NEXT: ld1.b { v1 }[1], [x1]
13897+
; CHECK-NEXT: ld1.b { v1 }[2], [x2]
13898+
; CHECK-NEXT: ld1.b { v1 }[3], [x3]
13899+
; CHECK-NEXT: ld1.b { v1 }[4], [x4]
13900+
; CHECK-NEXT: ld1.b { v1 }[5], [x5]
13901+
; CHECK-NEXT: ld1.b { v1 }[6], [x6]
13902+
; CHECK-NEXT: ld1.b { v1 }[7], [x7]
13903+
; CHECK-NEXT: sub.8b v0, v1, v0
13904+
; CHECK-NEXT: str d0, [x8]
13905+
; CHECK-NEXT: ret
1393113906
%ld.a = load i8, ptr %a
1393213907
%ld.b = load i8, ptr %b
1393313908
%ld.c = load i8, ptr %c

llvm/test/CodeGen/AArch64/arm64-ld1.ll

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,16 +1004,10 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin
10041004
declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly
10051005

10061006
define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
1007-
; CHECK-SD-LABEL: ld1_16b:
1008-
; CHECK-SD: // %bb.0:
1009-
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
1010-
; CHECK-SD-NEXT: ret
1011-
;
1012-
; CHECK-GI-LABEL: ld1_16b:
1013-
; CHECK-GI: // %bb.0:
1014-
; CHECK-GI-NEXT: ldr b1, [x0]
1015-
; CHECK-GI-NEXT: mov.b v0[0], v1[0]
1016-
; CHECK-GI-NEXT: ret
1007+
; CHECK-LABEL: ld1_16b:
1008+
; CHECK: // %bb.0:
1009+
; CHECK-NEXT: ld1.b { v0 }[0], [x0]
1010+
; CHECK-NEXT: ret
10171011
; Make sure we are using the operands defined by the ABI
10181012
%tmp1 = load i8, ptr %bar
10191013
%tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
@@ -1086,20 +1080,12 @@ define <1 x i64> @ld1_1d(ptr %p) {
10861080
}
10871081

10881082
define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
1089-
; CHECK-SD-LABEL: ld1_8b:
1090-
; CHECK-SD: // %bb.0:
1091-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1092-
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
1093-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1094-
; CHECK-SD-NEXT: ret
1095-
;
1096-
; CHECK-GI-LABEL: ld1_8b:
1097-
; CHECK-GI: // %bb.0:
1098-
; CHECK-GI-NEXT: ldr b1, [x0]
1099-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1100-
; CHECK-GI-NEXT: mov.b v0[0], v1[0]
1101-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1102-
; CHECK-GI-NEXT: ret
1083+
; CHECK-LABEL: ld1_8b:
1084+
; CHECK: // %bb.0:
1085+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1086+
; CHECK-NEXT: ld1.b { v0 }[0], [x0]
1087+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1088+
; CHECK-NEXT: ret
11031089
; Make sure we are using the operands defined by the ABI
11041090
%tmp1 = load i8, ptr %bar
11051091
%tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0

0 commit comments

Comments
 (0)