Skip to content

Commit bfdf30e

Browse files
committed
[AArch64] Add patterns for addv(sext) and addv(zext)
This adds patterns for v8i8->i16 vaddlv and v4i16->i32 vaddlv, for both signed and unsigned extends.
1 parent 4664a4c commit bfdf30e

File tree

4 files changed

+56
-95
lines changed

4 files changed

+56
-95
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7357,6 +7357,19 @@ defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
73577357
defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
73587358
defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
73597359

7360+
def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (sext (v8i8 V64:$op))))), (i64 0))),
7361+
(EXTRACT_SUBREG (v8i16 (SUBREG_TO_REG (i64 0), (SADDLVv8i8v V64:$op), hsub)), ssub)>;
7362+
def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (zext (v8i8 V64:$op))))), (i64 0))),
7363+
(EXTRACT_SUBREG (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub)), ssub)>;
7364+
def : Pat<(v8i16 (AArch64uaddv (v8i16 (sext (v8i8 V64:$op))))),
7365+
(v8i16 (SUBREG_TO_REG (i64 0), (SADDLVv8i8v V64:$op), hsub))>;
7366+
def : Pat<(v8i16 (AArch64uaddv (v8i16 (zext (v8i8 V64:$op))))),
7367+
(v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>;
7368+
def : Pat<(v4i32 (AArch64uaddv (v4i32 (sext (v4i16 V64:$op))))),
7369+
(v4i32 (SUBREG_TO_REG (i64 0), (SADDLVv4i16v V64:$op), ssub))>;
7370+
def : Pat<(v4i32 (AArch64uaddv (v4i32 (zext (v4i16 V64:$op))))),
7371+
(v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv4i16v V64:$op), ssub))>;
7372+
73607373
multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
73617374
// Patterns for addv(addlp(x)) ==> addlv
73627375
def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
@@ -7370,7 +7383,7 @@ multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp>
73707383
def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
73717384
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
73727385

7373-
// Patterns for addp(addlp(x))) ==> addlv
7386+
// Patterns for addp(addlp(x)) ==> addlv
73747387
def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
73757388
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
73767389
def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,8 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
443443
define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
444444
; CHECK-SD-LABEL: uabdl4s_rdx_i32:
445445
; CHECK-SD: // %bb.0:
446-
; CHECK-SD-NEXT: uabdl.4s v0, v0, v1
447-
; CHECK-SD-NEXT: addv.4s s0, v0
446+
; CHECK-SD-NEXT: uabd.4h v0, v0, v1
447+
; CHECK-SD-NEXT: uaddlv.4h s0, v0
448448
; CHECK-SD-NEXT: fmov w0, s0
449449
; CHECK-SD-NEXT: ret
450450
;

llvm/test/CodeGen/AArch64/neon-dotreduce.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,7 @@ define i32 @test_udot_v4i8_nomla(ptr nocapture readonly %a1) {
8787
; CHECK-SD: // %bb.0: // %entry
8888
; CHECK-SD-NEXT: ldr s0, [x0]
8989
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
90-
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
91-
; CHECK-SD-NEXT: addv s0, v0.4s
90+
; CHECK-SD-NEXT: uaddlv s0, v0.4h
9291
; CHECK-SD-NEXT: fmov w0, s0
9392
; CHECK-SD-NEXT: ret
9493
;

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 39 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -141,37 +141,23 @@ entry:
141141
}
142142

143143
define i32 @add_v4i16_v4i32_zext(<4 x i16> %x) {
144-
; CHECK-SD-LABEL: add_v4i16_v4i32_zext:
145-
; CHECK-SD: // %bb.0: // %entry
146-
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
147-
; CHECK-SD-NEXT: addv s0, v0.4s
148-
; CHECK-SD-NEXT: fmov w0, s0
149-
; CHECK-SD-NEXT: ret
150-
;
151-
; CHECK-GI-LABEL: add_v4i16_v4i32_zext:
152-
; CHECK-GI: // %bb.0: // %entry
153-
; CHECK-GI-NEXT: uaddlv s0, v0.4h
154-
; CHECK-GI-NEXT: fmov w0, s0
155-
; CHECK-GI-NEXT: ret
144+
; CHECK-LABEL: add_v4i16_v4i32_zext:
145+
; CHECK: // %bb.0: // %entry
146+
; CHECK-NEXT: uaddlv s0, v0.4h
147+
; CHECK-NEXT: fmov w0, s0
148+
; CHECK-NEXT: ret
156149
entry:
157150
%xx = zext <4 x i16> %x to <4 x i32>
158151
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
159152
ret i32 %z
160153
}
161154

162155
define i32 @add_v4i16_v4i32_sext(<4 x i16> %x) {
163-
; CHECK-SD-LABEL: add_v4i16_v4i32_sext:
164-
; CHECK-SD: // %bb.0: // %entry
165-
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
166-
; CHECK-SD-NEXT: addv s0, v0.4s
167-
; CHECK-SD-NEXT: fmov w0, s0
168-
; CHECK-SD-NEXT: ret
169-
;
170-
; CHECK-GI-LABEL: add_v4i16_v4i32_sext:
171-
; CHECK-GI: // %bb.0: // %entry
172-
; CHECK-GI-NEXT: saddlv s0, v0.4h
173-
; CHECK-GI-NEXT: fmov w0, s0
174-
; CHECK-GI-NEXT: ret
156+
; CHECK-LABEL: add_v4i16_v4i32_sext:
157+
; CHECK: // %bb.0: // %entry
158+
; CHECK-NEXT: saddlv s0, v0.4h
159+
; CHECK-NEXT: fmov w0, s0
160+
; CHECK-NEXT: ret
175161
entry:
176162
%xx = sext <4 x i16> %x to <4 x i32>
177163
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -483,8 +469,7 @@ define i32 @add_v4i8_v4i32_zext(<4 x i8> %x) {
483469
; CHECK-SD-LABEL: add_v4i8_v4i32_zext:
484470
; CHECK-SD: // %bb.0: // %entry
485471
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
486-
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
487-
; CHECK-SD-NEXT: addv s0, v0.4s
472+
; CHECK-SD-NEXT: uaddlv s0, v0.4h
488473
; CHECK-SD-NEXT: fmov w0, s0
489474
; CHECK-SD-NEXT: ret
490475
;
@@ -589,8 +574,7 @@ entry:
589574
define signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) {
590575
; CHECK-SD-LABEL: add_v8i8_v8i16_sext:
591576
; CHECK-SD: // %bb.0: // %entry
592-
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
593-
; CHECK-SD-NEXT: addv h0, v0.8h
577+
; CHECK-SD-NEXT: saddlv h0, v0.8b
594578
; CHECK-SD-NEXT: smov w0, v0.h[0]
595579
; CHECK-SD-NEXT: ret
596580
;
@@ -939,20 +923,12 @@ entry:
939923
}
940924

941925
define i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, i32 %a) {
942-
; CHECK-SD-LABEL: add_v4i16_v4i32_acc_zext:
943-
; CHECK-SD: // %bb.0: // %entry
944-
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
945-
; CHECK-SD-NEXT: addv s0, v0.4s
946-
; CHECK-SD-NEXT: fmov w8, s0
947-
; CHECK-SD-NEXT: add w0, w8, w0
948-
; CHECK-SD-NEXT: ret
949-
;
950-
; CHECK-GI-LABEL: add_v4i16_v4i32_acc_zext:
951-
; CHECK-GI: // %bb.0: // %entry
952-
; CHECK-GI-NEXT: uaddlv s0, v0.4h
953-
; CHECK-GI-NEXT: fmov w8, s0
954-
; CHECK-GI-NEXT: add w0, w8, w0
955-
; CHECK-GI-NEXT: ret
926+
; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
927+
; CHECK: // %bb.0: // %entry
928+
; CHECK-NEXT: uaddlv s0, v0.4h
929+
; CHECK-NEXT: fmov w8, s0
930+
; CHECK-NEXT: add w0, w8, w0
931+
; CHECK-NEXT: ret
956932
entry:
957933
%xx = zext <4 x i16> %x to <4 x i32>
958934
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -961,20 +937,12 @@ entry:
961937
}
962938

963939
define i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, i32 %a) {
964-
; CHECK-SD-LABEL: add_v4i16_v4i32_acc_sext:
965-
; CHECK-SD: // %bb.0: // %entry
966-
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
967-
; CHECK-SD-NEXT: addv s0, v0.4s
968-
; CHECK-SD-NEXT: fmov w8, s0
969-
; CHECK-SD-NEXT: add w0, w8, w0
970-
; CHECK-SD-NEXT: ret
971-
;
972-
; CHECK-GI-LABEL: add_v4i16_v4i32_acc_sext:
973-
; CHECK-GI: // %bb.0: // %entry
974-
; CHECK-GI-NEXT: saddlv s0, v0.4h
975-
; CHECK-GI-NEXT: fmov w8, s0
976-
; CHECK-GI-NEXT: add w0, w8, w0
977-
; CHECK-GI-NEXT: ret
940+
; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
941+
; CHECK: // %bb.0: // %entry
942+
; CHECK-NEXT: saddlv s0, v0.4h
943+
; CHECK-NEXT: fmov w8, s0
944+
; CHECK-NEXT: add w0, w8, w0
945+
; CHECK-NEXT: ret
978946
entry:
979947
%xx = sext <4 x i16> %x to <4 x i32>
980948
%z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
@@ -1324,8 +1292,7 @@ define i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) {
13241292
; CHECK-SD-LABEL: add_v4i8_v4i32_acc_zext:
13251293
; CHECK-SD: // %bb.0: // %entry
13261294
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
1327-
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
1328-
; CHECK-SD-NEXT: addv s0, v0.4s
1295+
; CHECK-SD-NEXT: uaddlv s0, v0.4h
13291296
; CHECK-SD-NEXT: fmov w8, s0
13301297
; CHECK-SD-NEXT: add w0, w8, w0
13311298
; CHECK-SD-NEXT: ret
@@ -1402,22 +1369,13 @@ entry:
14021369
}
14031370

14041371
define zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) {
1405-
; CHECK-SD-LABEL: add_v8i8_v8i16_acc_zext:
1406-
; CHECK-SD: // %bb.0: // %entry
1407-
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
1408-
; CHECK-SD-NEXT: addv h0, v0.8h
1409-
; CHECK-SD-NEXT: fmov w8, s0
1410-
; CHECK-SD-NEXT: add w8, w8, w0
1411-
; CHECK-SD-NEXT: and w0, w8, #0xffff
1412-
; CHECK-SD-NEXT: ret
1413-
;
1414-
; CHECK-GI-LABEL: add_v8i8_v8i16_acc_zext:
1415-
; CHECK-GI: // %bb.0: // %entry
1416-
; CHECK-GI-NEXT: uaddlv h0, v0.8b
1417-
; CHECK-GI-NEXT: fmov w8, s0
1418-
; CHECK-GI-NEXT: add w8, w8, w0
1419-
; CHECK-GI-NEXT: and w0, w8, #0xffff
1420-
; CHECK-GI-NEXT: ret
1372+
; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
1373+
; CHECK: // %bb.0: // %entry
1374+
; CHECK-NEXT: uaddlv h0, v0.8b
1375+
; CHECK-NEXT: fmov w8, s0
1376+
; CHECK-NEXT: add w8, w8, w0
1377+
; CHECK-NEXT: and w0, w8, #0xffff
1378+
; CHECK-NEXT: ret
14211379
entry:
14221380
%xx = zext <8 x i8> %x to <8 x i16>
14231381
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
@@ -1426,22 +1384,13 @@ entry:
14261384
}
14271385

14281386
define signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) {
1429-
; CHECK-SD-LABEL: add_v8i8_v8i16_acc_sext:
1430-
; CHECK-SD: // %bb.0: // %entry
1431-
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
1432-
; CHECK-SD-NEXT: addv h0, v0.8h
1433-
; CHECK-SD-NEXT: fmov w8, s0
1434-
; CHECK-SD-NEXT: add w8, w8, w0
1435-
; CHECK-SD-NEXT: sxth w0, w8
1436-
; CHECK-SD-NEXT: ret
1437-
;
1438-
; CHECK-GI-LABEL: add_v8i8_v8i16_acc_sext:
1439-
; CHECK-GI: // %bb.0: // %entry
1440-
; CHECK-GI-NEXT: saddlv h0, v0.8b
1441-
; CHECK-GI-NEXT: fmov w8, s0
1442-
; CHECK-GI-NEXT: add w8, w8, w0
1443-
; CHECK-GI-NEXT: sxth w0, w8
1444-
; CHECK-GI-NEXT: ret
1387+
; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
1388+
; CHECK: // %bb.0: // %entry
1389+
; CHECK-NEXT: saddlv h0, v0.8b
1390+
; CHECK-NEXT: fmov w8, s0
1391+
; CHECK-NEXT: add w8, w8, w0
1392+
; CHECK-NEXT: sxth w0, w8
1393+
; CHECK-NEXT: ret
14451394
entry:
14461395
%xx = sext <8 x i8> %x to <8 x i16>
14471396
%z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)

0 commit comments

Comments
 (0)