Skip to content

Commit 49dfa80

Browse files
committed
[AArch64][GlobalISel] Combine Vector Reduction Add Long
ADDLV(ADDLP) => ADDLV Removes unnecessary ADDLP instruction ADDV(ADDLP) => ADDLV Already exists for SDAG, adding for GlobalISel
1 parent c933054 commit 49dfa80

File tree

9 files changed

+268
-241
lines changed

9 files changed

+268
-241
lines changed

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,18 @@ def G_SMULL : AArch64GenericInstruction {
227227
let hasSideEffects = 0;
228228
}
229229

230+
def G_UADDLP : AArch64GenericInstruction {
231+
let OutOperandList = (outs type0:$dst);
232+
let InOperandList = (ins type0:$src1);
233+
let hasSideEffects = 0;
234+
}
235+
236+
def G_SADDLP : AArch64GenericInstruction {
237+
let OutOperandList = (outs type0:$dst);
238+
let InOperandList = (ins type0:$src1);
239+
let hasSideEffects = 0;
240+
}
241+
230242
def G_UADDLV : AArch64GenericInstruction {
231243
let OutOperandList = (outs type0:$dst);
232244
let InOperandList = (ins type0:$src1);
@@ -294,6 +306,9 @@ def : GINodeEquiv<G_BSP, AArch64bsp>;
294306
def : GINodeEquiv<G_UMULL, AArch64umull>;
295307
def : GINodeEquiv<G_SMULL, AArch64smull>;
296308

309+
def : GINodeEquiv<G_SADDLP, AArch64saddlp_n>;
310+
def : GINodeEquiv<G_UADDLP, AArch64uaddlp_n>;
311+
297312
def : GINodeEquiv<G_SADDLV, AArch64saddlv>;
298313
def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;
299314

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6664,6 +6664,26 @@ multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp>
66646664
defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
66656665
defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
66666666

6667+
// Pattern is used for GlobalISel
6668+
multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> {
6669+
// Patterns for addv(addlp(x)) ==> addlv
6670+
def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))),
6671+
(!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>;
6672+
def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))),
6673+
(!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>;
6674+
def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))),
6675+
(!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>;
6676+
6677+
// Patterns for addp(addlp(x))) ==> addlv
6678+
def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))),
6679+
(!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>;
6680+
def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))),
6681+
(!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>;
6682+
}
6683+
6684+
defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>;
6685+
defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>;
6686+
66676687
// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
66686688
def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
66696689
(i64 (EXTRACT_SUBREG
@@ -6675,6 +6695,9 @@ def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op)))
66756695
(v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
66766696
ssub))>;
66776697

6698+
def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6699+
(v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>;
6700+
66786701
def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
66796702
(v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;
66806703

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,58 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
14451445

14461446
return true;
14471447
}
1448+
case Intrinsic::aarch64_neon_uaddlp:
1449+
case Intrinsic::aarch64_neon_saddlp: {
1450+
MachineIRBuilder MIB(MI);
1451+
MachineRegisterInfo &MRI = *MIB.getMRI();
1452+
1453+
unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
1454+
? AArch64::G_UADDLP
1455+
: AArch64::G_SADDLP;
1456+
MIB.buildInstr(Opc, {MI.getOperand(0)}, {MI.getOperand(2)});
1457+
MI.eraseFromParent();
1458+
1459+
return true;
1460+
}
1461+
case Intrinsic::aarch64_neon_uaddlv:
1462+
case Intrinsic::aarch64_neon_saddlv: {
1463+
MachineIRBuilder MIB(MI);
1464+
MachineRegisterInfo &MRI = *MIB.getMRI();
1465+
1466+
unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
1467+
? AArch64::G_UADDLV
1468+
: AArch64::G_SADDLV;
1469+
Register DstReg = MI.getOperand(0).getReg();
1470+
Register SrcReg = MI.getOperand(2).getReg();
1471+
LLT DstTy = MRI.getType(DstReg);
1472+
1473+
LLT MidTy, ExtTy;
1474+
if (DstTy.isScalar() && DstTy.getScalarSizeInBits() <= 32) {
1475+
MidTy = LLT::fixed_vector(4, 32);
1476+
ExtTy = LLT::scalar(32);
1477+
} else {
1478+
MidTy = LLT::fixed_vector(2, 64);
1479+
ExtTy = LLT::scalar(64);
1480+
}
1481+
1482+
Register MidReg =
1483+
MIB.buildInstr(Opc, {MidTy}, {SrcReg})->getOperand(0).getReg();
1484+
Register ZeroReg =
1485+
MIB.buildConstant(LLT::scalar(64), 0)->getOperand(0).getReg();
1486+
Register ExtReg = MIB.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {ExtTy},
1487+
{MidReg, ZeroReg})
1488+
->getOperand(0)
1489+
.getReg();
1490+
1491+
if (DstTy.getScalarSizeInBits() < 32)
1492+
MIB.buildTrunc(DstReg, ExtReg);
1493+
else
1494+
MIB.buildCopy(DstReg, ExtReg);
1495+
1496+
MI.eraseFromParent();
1497+
1498+
return true;
1499+
}
14481500
case Intrinsic::aarch64_neon_smax:
14491501
case Intrinsic::aarch64_neon_smin:
14501502
case Intrinsic::aarch64_neon_umax:

llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir

Lines changed: 68 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ body: |
6969
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %copy(s32)
7070
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[ZEXT]](s64)
7171
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
72-
; CHECK-NEXT: %ctpop:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
72+
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
73+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
74+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C]](s64)
75+
; CHECK-NEXT: %ctpop:_(s32) = COPY [[EVEC]](s32)
7376
; CHECK-NEXT: $w0 = COPY %ctpop(s32)
7477
; CHECK-NEXT: RET_ReallyLR implicit $w0
7578
;
@@ -98,8 +101,11 @@ body: |
98101
; CHECK-NEXT: %copy:_(s64) = COPY $x0
99102
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST %copy(s64)
100103
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
101-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
102-
; CHECK-NEXT: %ctpop:_(s64) = G_ZEXT [[INT]](s32)
104+
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
105+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
106+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C]](s64)
107+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
108+
; CHECK-NEXT: %ctpop:_(s64) = G_ZEXT [[COPY]](s32)
103109
; CHECK-NEXT: $x0 = COPY %ctpop(s64)
104110
; CHECK-NEXT: RET_ReallyLR implicit $x0
105111
;
@@ -131,12 +137,14 @@ body: |
131137
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64)
132138
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[MV]](s128)
133139
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
134-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<16 x s8>)
135-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
136-
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[INT]](s32), [[C]](s32)
137-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
140+
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
141+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
142+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C]](s64)
143+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
144+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
145+
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[C1]](s32)
138146
; CHECK-NEXT: $x0 = COPY [[MV1]](s64)
139-
; CHECK-NEXT: $x1 = COPY [[C1]](s64)
147+
; CHECK-NEXT: $x1 = COPY [[C]](s64)
140148
; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
141149
;
142150
; CHECK-CSSC-LABEL: name: s128_lower
@@ -177,9 +185,12 @@ body: |
177185
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
178186
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
179187
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
180-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
181-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
182-
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
188+
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
189+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
190+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
191+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
192+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
193+
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
183194
; CHECK-NEXT: RET_ReallyLR implicit $w0
184195
;
185196
; CHECK-CSSC-LABEL: name: widen_s16
@@ -216,9 +227,12 @@ body: |
216227
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
217228
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
218229
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
219-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
220-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
221-
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
230+
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
231+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
232+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
233+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
234+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
235+
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
222236
; CHECK-NEXT: RET_ReallyLR implicit $w0
223237
;
224238
; CHECK-CSSC-LABEL: name: widen_s8
@@ -255,9 +269,12 @@ body: |
255269
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
256270
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
257271
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
258-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
259-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
260-
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
272+
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
273+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
274+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
275+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
276+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
277+
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
261278
; CHECK-NEXT: RET_ReallyLR implicit $w0
262279
;
263280
; CHECK-CSSC-LABEL: name: widen_s3
@@ -293,9 +310,12 @@ body: |
293310
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]]
294311
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64)
295312
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
296-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>)
297-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32)
298-
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
313+
; CHECK-NEXT: [[UADDLV:%[0-9]+]]:_(<4 x s32>) = G_UADDLV [[CTPOP]]
314+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
315+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[UADDLV]](<4 x s32>), [[C1]](s64)
316+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
317+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
318+
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
299319
; CHECK-NEXT: RET_ReallyLR implicit $w0
300320
;
301321
; CHECK-CSSC-LABEL: name: different_sizes
@@ -329,8 +349,8 @@ body: |
329349
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
330350
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
331351
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
332-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
333-
; CHECK-NEXT: $q0 = COPY [[INT]](<8 x s16>)
352+
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
353+
; CHECK-NEXT: $q0 = COPY [[UADDLP]](<8 x s16>)
334354
; CHECK-NEXT: RET_ReallyLR implicit $q0
335355
;
336356
; CHECK-CSSC-LABEL: name: custom_8x16
@@ -339,8 +359,8 @@ body: |
339359
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
340360
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
341361
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
342-
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
343-
; CHECK-CSSC-NEXT: $q0 = COPY [[INT]](<8 x s16>)
362+
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
363+
; CHECK-CSSC-NEXT: $q0 = COPY [[UADDLP]](<8 x s16>)
344364
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
345365
%0:_(<8 x s16>) = COPY $q0
346366
%1:_(<8 x s16>) = G_CTPOP %0(<8 x s16>)
@@ -361,9 +381,9 @@ body: |
361381
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
362382
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
363383
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
364-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
365-
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
366-
; CHECK-NEXT: $q0 = COPY [[INT1]](<4 x s32>)
384+
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
385+
; CHECK-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
386+
; CHECK-NEXT: $q0 = COPY [[UADDLP1]](<4 x s32>)
367387
; CHECK-NEXT: RET_ReallyLR implicit $q0
368388
;
369389
; CHECK-CSSC-LABEL: name: custom_4x32
@@ -372,9 +392,9 @@ body: |
372392
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
373393
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
374394
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
375-
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
376-
; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
377-
; CHECK-CSSC-NEXT: $q0 = COPY [[INT1]](<4 x s32>)
395+
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
396+
; CHECK-CSSC-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
397+
; CHECK-CSSC-NEXT: $q0 = COPY [[UADDLP1]](<4 x s32>)
378398
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
379399
%0:_(<4 x s32>) = COPY $q0
380400
%1:_(<4 x s32>) = G_CTPOP %0(<4 x s32>)
@@ -395,10 +415,10 @@ body: |
395415
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
396416
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
397417
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
398-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
399-
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
400-
; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
401-
; CHECK-NEXT: $q0 = COPY [[INT2]](<2 x s64>)
418+
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
419+
; CHECK-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
420+
; CHECK-NEXT: [[UADDLP2:%[0-9]+]]:_(<2 x s64>) = G_UADDLP [[UADDLP1]]
421+
; CHECK-NEXT: $q0 = COPY [[UADDLP2]](<2 x s64>)
402422
; CHECK-NEXT: RET_ReallyLR implicit $q0
403423
;
404424
; CHECK-CSSC-LABEL: name: custom_2x64
@@ -407,10 +427,10 @@ body: |
407427
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
408428
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
409429
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>)
410-
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>)
411-
; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>)
412-
; CHECK-CSSC-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>)
413-
; CHECK-CSSC-NEXT: $q0 = COPY [[INT2]](<2 x s64>)
430+
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<8 x s16>) = G_UADDLP [[CTPOP]]
431+
; CHECK-CSSC-NEXT: [[UADDLP1:%[0-9]+]]:_(<4 x s32>) = G_UADDLP [[UADDLP]]
432+
; CHECK-CSSC-NEXT: [[UADDLP2:%[0-9]+]]:_(<2 x s64>) = G_UADDLP [[UADDLP1]]
433+
; CHECK-CSSC-NEXT: $q0 = COPY [[UADDLP2]](<2 x s64>)
414434
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0
415435
%0:_(<2 x s64>) = COPY $q0
416436
%1:_(<2 x s64>) = G_CTPOP %0(<2 x s64>)
@@ -431,8 +451,8 @@ body: |
431451
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
432452
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
433453
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
434-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
435-
; CHECK-NEXT: $d0 = COPY [[INT]](<4 x s16>)
454+
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
455+
; CHECK-NEXT: $d0 = COPY [[UADDLP]](<4 x s16>)
436456
; CHECK-NEXT: RET_ReallyLR implicit $d0
437457
;
438458
; CHECK-CSSC-LABEL: name: custom_4x16
@@ -441,8 +461,8 @@ body: |
441461
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
442462
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>)
443463
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
444-
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
445-
; CHECK-CSSC-NEXT: $d0 = COPY [[INT]](<4 x s16>)
464+
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
465+
; CHECK-CSSC-NEXT: $d0 = COPY [[UADDLP]](<4 x s16>)
446466
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0
447467
%0:_(<4 x s16>) = COPY $d0
448468
%1:_(<4 x s16>) = G_CTPOP %0(<4 x s16>)
@@ -463,9 +483,9 @@ body: |
463483
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
464484
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
465485
; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
466-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
467-
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
468-
; CHECK-NEXT: $d0 = COPY [[INT1]](<2 x s32>)
486+
; CHECK-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
487+
; CHECK-NEXT: [[UADDLP1:%[0-9]+]]:_(<2 x s32>) = G_UADDLP [[UADDLP]]
488+
; CHECK-NEXT: $d0 = COPY [[UADDLP1]](<2 x s32>)
469489
; CHECK-NEXT: RET_ReallyLR implicit $d0
470490
;
471491
; CHECK-CSSC-LABEL: name: custom_2x32
@@ -474,9 +494,9 @@ body: |
474494
; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
475495
; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
476496
; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>)
477-
; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>)
478-
; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>)
479-
; CHECK-CSSC-NEXT: $d0 = COPY [[INT1]](<2 x s32>)
497+
; CHECK-CSSC-NEXT: [[UADDLP:%[0-9]+]]:_(<4 x s16>) = G_UADDLP [[CTPOP]]
498+
; CHECK-CSSC-NEXT: [[UADDLP1:%[0-9]+]]:_(<2 x s32>) = G_UADDLP [[UADDLP]]
499+
; CHECK-CSSC-NEXT: $d0 = COPY [[UADDLP1]](<2 x s32>)
480500
; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0
481501
%0:_(<2 x s32>) = COPY $d0
482502
%1:_(<2 x s32>) = G_CTPOP %0(<2 x s32>)

0 commit comments

Comments
 (0)