Skip to content

Commit f6c5839

Browse files
committed
Remove nxv4i64 case
1 parent 00a1be2 commit f6c5839

File tree

2 files changed

+23
-126
lines changed

2 files changed

+23
-126
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 23 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -21795,35 +21795,6 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
2179521795
if (A.getValueType() != B.getValueType())
2179621796
return SDValue();
2179721797

21798-
// The fully-reduced type. Should be a vector of i32 or i64
21799-
EVT FullType = N->getValueType(0);
21800-
// The type that is extended to the wide type. Should be an i8 or i16
21801-
EVT ExtendedType = A.getValueType();
21802-
// The wide type with four times as many elements as the reduced type. Should
21803-
// be a vector of i32 or i64, the same as the fully-reduced type
21804-
EVT WideType = MulOp.getValueType();
21805-
if (WideType.getScalarSizeInBits() != FullType.getScalarSizeInBits())
21806-
return SDValue();
21807-
// Dot products operate on chunks of four elements so there must be four times
21808-
// as many elements in the wide type
21809-
if (WideType.getVectorMinNumElements() / FullType.getVectorMinNumElements() !=
21810-
4)
21811-
return SDValue();
21812-
switch (FullType.getScalarSizeInBits()) {
21813-
case 32:
21814-
if (ExtendedType.getScalarSizeInBits() != 8)
21815-
return SDValue();
21816-
break;
21817-
case 64:
21818-
// i8 to i64 can be done with an extended i32 dot product
21819-
if (ExtendedType.getScalarSizeInBits() != 8 &&
21820-
ExtendedType.getScalarSizeInBits() != 16)
21821-
return SDValue();
21822-
break;
21823-
default:
21824-
return SDValue();
21825-
}
21826-
2182721798
unsigned DotIntrinsicId = Intrinsic::not_intrinsic;
2182821799

2182921800
if (IsSExt)
@@ -21834,33 +21805,31 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
2183421805
assert(DotIntrinsicId != Intrinsic::not_intrinsic &&
2183521806
"Unexpected dot product case encountered.");
2183621807

21837-
EVT Type = NarrowOp.getValueType();
21808+
auto IntrinsicId = DAG.getConstant(DotIntrinsicId, DL, MVT::i64);
2183821809

21839-
// 8 bit input to 64 bit output can be done by doing a 32 bit dot product
21840-
// and extending the output
21841-
bool Extend = A->getValueType(0).getScalarSizeInBits() == 8 &&
21842-
Type.getScalarSizeInBits() == 64;
21843-
SDValue Accumulator = NarrowOp;
21844-
if (Extend) {
21845-
Type =
21846-
Type.changeVectorElementType(EVT::getIntegerVT(*DAG.getContext(), 32));
21847-
// The accumulator is of the wider type so we insert a 0 accumulator and
21848-
// add the proper one after extending
21849-
Accumulator = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv4i32,
21850-
DAG.getConstant(0, DL, MVT::i32));
21851-
}
21810+
// The fully-reduced type. Should be a vector of i32 or i64
21811+
EVT ReducedType = N->getValueType(0);
21812+
// The type that is extended to the wide type. Should be an i8 or i16
21813+
EVT ExtendedType = A.getValueType();
21814+
// The wide type with four times as many elements as the reduced type. Should
21815+
// be a vector of i32 or i64, the same as the fully-reduced type
21816+
EVT WideType = MulOp.getValueType();
21817+
if (WideType.getScalarSizeInBits() != ReducedType.getScalarSizeInBits())
21818+
return SDValue();
2185221819

21853-
auto IntrinsicId = DAG.getConstant(DotIntrinsicId, DL, MVT::i64);
21854-
auto DotProduct = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Type,
21855-
{IntrinsicId, Accumulator, A, B});
21856-
if (Extend) {
21857-
auto Extended = DAG.getNode(IsZExt ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND,
21858-
DL, NarrowOp.getValueType(), {DotProduct});
21859-
auto AccAdd = DAG.getNode(ISD::ADD, DL, NarrowOp.getValueType(),
21860-
{NarrowOp, Extended});
21861-
DotProduct = AccAdd;
21862-
}
21863-
return DotProduct;
21820+
// Dot products operate on chunks of four elements so there must be four times
21821+
// as many elements in the wide type
21822+
if (WideType == MVT::nxv16i32 && ReducedType == MVT::nxv4i32 &&
21823+
ExtendedType == MVT::nxv16i8)
21824+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv4i32,
21825+
{IntrinsicId, NarrowOp, A, B});
21826+
21827+
if (WideType == MVT::nxv8i64 && ReducedType == MVT::nxv2i64 &&
21828+
ExtendedType == MVT::nxv8i16)
21829+
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv2i64,
21830+
{IntrinsicId, NarrowOp, A, B});
21831+
21832+
return SDValue();
2186421833
}
2186521834

2186621835
static SDValue performIntrinsicCombine(SDNode *N,

llvm/test/CodeGen/AArch64/partial-reduce-dot-product.ll

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -61,78 +61,6 @@ entry:
6161
ret <vscale x 2 x i64> %partial.reduce
6262
}
6363

64-
define <vscale x 4 x i64> @dotp_8to64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
65-
; CHECK-LABEL: dotp_8to64:
66-
; CHECK: // %bb.0: // %entry
67-
; CHECK-NEXT: mov z2.s, #0 // =0x0
68-
; CHECK-NEXT: udot z2.s, z0.b, z1.b
69-
; CHECK-NEXT: uunpklo z0.d, z2.s
70-
; CHECK-NEXT: uunpkhi z1.d, z2.s
71-
; CHECK-NEXT: ret
72-
entry:
73-
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
74-
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i64>
75-
%mult = mul nuw nsw <vscale x 16 x i64> %a.wide, %b.wide
76-
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(
77-
<vscale x 4 x i64> zeroinitializer, <vscale x 16 x i64> %mult)
78-
ret <vscale x 4 x i64> %partial.reduce
79-
}
80-
81-
define <vscale x 4 x i64> @dotp_sext_8to64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
82-
; CHECK-LABEL: dotp_sext_8to64:
83-
; CHECK: // %bb.0: // %entry
84-
; CHECK-NEXT: mov z2.s, #0 // =0x0
85-
; CHECK-NEXT: sdot z2.s, z0.b, z1.b
86-
; CHECK-NEXT: sunpklo z0.d, z2.s
87-
; CHECK-NEXT: sunpkhi z1.d, z2.s
88-
; CHECK-NEXT: ret
89-
entry:
90-
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
91-
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i64>
92-
%mult = mul nuw nsw <vscale x 16 x i64> %a.wide, %b.wide
93-
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(
94-
<vscale x 4 x i64> zeroinitializer, <vscale x 16 x i64> %mult)
95-
ret <vscale x 4 x i64> %partial.reduce
96-
}
97-
98-
define <vscale x 4 x i64> @dotp_8to64_accumulator(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 4 x i64> %acc) {
99-
; CHECK-LABEL: dotp_8to64_accumulator:
100-
; CHECK: // %bb.0: // %entry
101-
; CHECK-NEXT: mov z4.s, #0 // =0x0
102-
; CHECK-NEXT: udot z4.s, z0.b, z1.b
103-
; CHECK-NEXT: uunpklo z0.d, z4.s
104-
; CHECK-NEXT: uunpkhi z1.d, z4.s
105-
; CHECK-NEXT: add z0.d, z2.d, z0.d
106-
; CHECK-NEXT: add z1.d, z3.d, z1.d
107-
; CHECK-NEXT: ret
108-
entry:
109-
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
110-
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i64>
111-
%mult = mul nuw nsw <vscale x 16 x i64> %a.wide, %b.wide
112-
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(
113-
<vscale x 4 x i64> %acc, <vscale x 16 x i64> %mult)
114-
ret <vscale x 4 x i64> %partial.reduce
115-
}
116-
117-
define <vscale x 4 x i64> @dotp_sext_8to64_accumulator(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 4 x i64> %acc) {
118-
; CHECK-LABEL: dotp_sext_8to64_accumulator:
119-
; CHECK: // %bb.0: // %entry
120-
; CHECK-NEXT: mov z4.s, #0 // =0x0
121-
; CHECK-NEXT: sdot z4.s, z0.b, z1.b
122-
; CHECK-NEXT: sunpklo z0.d, z4.s
123-
; CHECK-NEXT: sunpkhi z1.d, z4.s
124-
; CHECK-NEXT: add z0.d, z2.d, z0.d
125-
; CHECK-NEXT: add z1.d, z3.d, z1.d
126-
; CHECK-NEXT: ret
127-
entry:
128-
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
129-
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i64>
130-
%mult = mul nuw nsw <vscale x 16 x i64> %a.wide, %b.wide
131-
%partial.reduce = tail call <vscale x 4 x i64> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(
132-
<vscale x 4 x i64> %acc, <vscale x 16 x i64> %mult)
133-
ret <vscale x 4 x i64> %partial.reduce
134-
}
135-
13664
define <vscale x 4 x i32> @not_dotp(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
13765
; CHECK-LABEL: not_dotp:
13866
; CHECK: // %bb.0: // %entry

0 commit comments

Comments
 (0)