Skip to content

Commit 228c740

Browse files
committed
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The *.ll tests are all generated by a small bit of throw-away scripting from the corresponding *.c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421
1 parent 9bace26 commit 228c740

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2742
-339
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 51 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -135,91 +135,70 @@ def vabdq: Intrinsic<Vector, (args Vector:$a, Vector:$b),
135135
(IRInt<"vabd", [Vector]> $a, $b)>;
136136
}
137137

138+
multiclass VectorVectorArithmetic<string operation> {
139+
defm "" : IntrinsicMX<Vector, (args Vector:$a, Vector:$b,
140+
Predicate:$pred),
141+
(IRInt<operation, [Vector, Predicate]> $a, $b,
142+
$pred, $inactive)>;
143+
}
144+
145+
multiclass VectorVectorArithmeticBitcast<string operation> {
146+
defm "" : IntrinsicMX<Vector, (args Vector:$a, Vector:$b,
147+
Predicate:$pred),
148+
(bitcast (IRInt<operation, [UVector, Predicate]>
149+
(bitcast $a, UVector),
150+
(bitcast $b, UVector),
151+
$pred,
152+
(bitcast $inactive, UVector)), Vector)>;
153+
}
154+
138155
// Predicated intrinsics
139156
let params = T.Usual in {
140-
def vabdq_m: Intrinsic<
141-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
142-
(IRInt<"abd_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
143-
def vaddq_m: Intrinsic<
144-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
145-
(IRInt<"add_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
146-
def vsubq_m: Intrinsic<
147-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
148-
(IRInt<"sub_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
149-
def vmulq_m: Intrinsic<
150-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
151-
(IRInt<"mul_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
152-
defm vandq_m: predicated_bit_op_fp<"and_predicated">;
153-
defm vbicq_m: predicated_bit_op_fp<"bic_predicated">;
154-
defm veorq_m: predicated_bit_op_fp<"eor_predicated">;
155-
defm vornq_m: predicated_bit_op_fp<"orn_predicated">;
156-
defm vorrq_m: predicated_bit_op_fp<"orr_predicated">;
157+
defm vabdq : VectorVectorArithmetic<"abd_predicated">;
158+
defm vaddq : VectorVectorArithmetic<"add_predicated">;
159+
defm vsubq : VectorVectorArithmetic<"sub_predicated">;
160+
defm vmulq : VectorVectorArithmetic<"mul_predicated">;
161+
defm vandq : VectorVectorArithmeticBitcast<"and_predicated">;
162+
defm vbicq : VectorVectorArithmeticBitcast<"bic_predicated">;
163+
defm veorq : VectorVectorArithmeticBitcast<"eor_predicated">;
164+
defm vornq : VectorVectorArithmeticBitcast<"orn_predicated">;
165+
defm vorrq : VectorVectorArithmeticBitcast<"orr_predicated">;
166+
}
167+
168+
multiclass DblVectorVectorArithmetic<string operation, dag top> {
169+
defm "" : IntrinsicMX<DblVector, (args Vector:$a, Vector:$b,
170+
Predicate:$pred),
171+
(IRInt<operation,
172+
[DblVector, Vector, Predicate]>
173+
$a, $b, top, $pred, $inactive)>;
157174
}
158175

159176
// Predicated intrinsics - Int types only
160177
let params = T.Int in {
161-
def vminq_m: Intrinsic<
162-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
163-
(IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
164-
def vmaxq_m: Intrinsic<
165-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
166-
(IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
167-
def vmulhq_m: Intrinsic<
168-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
169-
(IRInt<"mulh_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
170-
def vrmulhq_m: Intrinsic<
171-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
172-
(IRInt<"rmulh_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
173-
def vqdmulhq_m: Intrinsic<
174-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
175-
(IRInt<"qdmulh_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
176-
def vqrdmulhq_m: Intrinsic<
177-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
178-
(IRInt<"qrdmulh_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
179-
def vmullbq_int_m: Intrinsic<
180-
DblVector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
181-
(IRInt<"mull_int_predicated", [DblVector, Vector, Predicate]> $a, $b, 0,
182-
$pred, $inactive)>;
183-
def vmulltq_int_m: Intrinsic<
184-
DblVector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
185-
(IRInt<"mull_int_predicated", [DblVector, Vector, Predicate]> $a, $b, 1,
186-
$pred, $inactive)>;
187-
def vqaddq_m: Intrinsic<
188-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
189-
(IRInt<"qadd_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
190-
def vhaddq_m: Intrinsic<
191-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
192-
(IRInt<"hadd_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
193-
def vrhaddq_m: Intrinsic<
194-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
195-
(IRInt<"rhadd_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
196-
def vqsubq_m: Intrinsic<
197-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
198-
(IRInt<"qsub_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
199-
def vhsubq_m: Intrinsic<
200-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
201-
(IRInt<"hsub_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
178+
defm vminq : VectorVectorArithmetic<"min_predicated">;
179+
defm vmaxq : VectorVectorArithmetic<"max_predicated">;
180+
defm vmulhq : VectorVectorArithmetic<"mulh_predicated">;
181+
defm vrmulhq : VectorVectorArithmetic<"rmulh_predicated">;
182+
defm vqdmulhq : VectorVectorArithmetic<"qdmulh_predicated">;
183+
defm vqrdmulhq : VectorVectorArithmetic<"qrdmulh_predicated">;
184+
defm vqaddq : VectorVectorArithmetic<"qadd_predicated">;
185+
defm vhaddq : VectorVectorArithmetic<"hadd_predicated">;
186+
defm vrhaddq : VectorVectorArithmetic<"rhadd_predicated">;
187+
defm vqsubq : VectorVectorArithmetic<"qsub_predicated">;
188+
defm vhsubq : VectorVectorArithmetic<"hsub_predicated">;
189+
defm vmullbq_int : DblVectorVectorArithmetic<"mull_int_predicated", (u32 0)>;
190+
defm vmulltq_int : DblVectorVectorArithmetic<"mull_int_predicated", (u32 1)>;
202191
}
203192

204193
let params = T.Poly, overrideKindLetter = "p" in {
205-
def vmullbq_poly_m: Intrinsic<
206-
DblVector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
207-
(IRInt<"mull_poly_predicated", [DblVector, Vector, Predicate]> $a, $b, 0,
208-
$pred, $inactive)>;
209-
def vmulltq_poly_m: Intrinsic<
210-
DblVector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
211-
(IRInt<"mull_poly_predicated", [DblVector, Vector, Predicate]> $a, $b, 1,
212-
$pred, $inactive)>;
194+
defm vmullbq_poly : DblVectorVectorArithmetic<"mull_poly_predicated", (u32 0)>;
195+
defm vmulltq_poly : DblVectorVectorArithmetic<"mull_poly_predicated", (u32 1)>;
213196
}
214197

215198
// Predicated intrinsics - Float types only
216199
let params = T.Float in {
217-
def vminnmq_m: Intrinsic<
218-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
219-
(IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
220-
def vmaxnmq_m: Intrinsic<
221-
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
222-
(IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
200+
defm vminnmq : VectorVectorArithmetic<"min_predicated">;
201+
defm vmaxnmq : VectorVectorArithmetic<"max_predicated">;
223202
}
224203

225204
let params = T.Int in {

clang/test/CodeGen/arm-mve-intrinsics/vabdq.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,51 @@ float32x4_t test_vabdq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b,
9393
return vabdq_m_f32(inactive, a, b, p);
9494
#endif /* POLYMORPHIC */
9595
}
96+
97+
// CHECK-LABEL: @test_vabdq_x_u16(
98+
// CHECK-NEXT: entry:
99+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
100+
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
101+
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
102+
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
103+
//
104+
uint16x8_t test_vabdq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
105+
{
106+
#ifdef POLYMORPHIC
107+
return vabdq_x(a, b, p);
108+
#else /* POLYMORPHIC */
109+
return vabdq_x_u16(a, b, p);
110+
#endif /* POLYMORPHIC */
111+
}
112+
113+
// CHECK-LABEL: @test_vabdq_x_u32(
114+
// CHECK-NEXT: entry:
115+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
116+
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
117+
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef)
118+
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
119+
//
120+
uint32x4_t test_vabdq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
121+
{
122+
#ifdef POLYMORPHIC
123+
return vabdq_x(a, b, p);
124+
#else /* POLYMORPHIC */
125+
return vabdq_x_u32(a, b, p);
126+
#endif /* POLYMORPHIC */
127+
}
128+
129+
// CHECK-LABEL: @test_vabdq_x_f16(
130+
// CHECK-NEXT: entry:
131+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
132+
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
133+
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
134+
// CHECK-NEXT: ret <8 x half> [[TMP2]]
135+
//
136+
float16x8_t test_vabdq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
137+
{
138+
#ifdef POLYMORPHIC
139+
return vabdq_x(a, b, p);
140+
#else /* POLYMORPHIC */
141+
return vabdq_x_f16(a, b, p);
142+
#endif /* POLYMORPHIC */
143+
}

clang/test/CodeGen/arm-mve-intrinsics/vaddq.c

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@ uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b)
1818
#endif /* POLYMORPHIC */
1919
}
2020

21-
// CHECK-LABEL: @test_vsubq_f16(
21+
// CHECK-LABEL: @test_vaddq_f16(
2222
// CHECK-NEXT: entry:
23-
// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[B:%.*]]
23+
// CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x half> [[A:%.*]], [[B:%.*]]
2424
// CHECK-NEXT: ret <8 x half> [[TMP0]]
2525
//
26-
float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b)
26+
float16x8_t test_vaddq_f16(float16x8_t a, float16x8_t b)
2727
{
2828
#ifdef POLYMORPHIC
29-
return vsubq(a, b);
29+
return vaddq(a, b);
3030
#else /* POLYMORPHIC */
31-
return vsubq_f16(a, b);
31+
return vaddq_f16(a, b);
3232
#endif /* POLYMORPHIC */
3333
}
3434

@@ -48,18 +48,50 @@ int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred
4848
#endif /* POLYMORPHIC */
4949
}
5050

51-
// CHECK-LABEL: @test_vsubq_m_f32(
51+
// CHECK-LABEL: @test_vaddq_m_f32(
5252
// CHECK-NEXT: entry:
5353
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
5454
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
55-
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
55+
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
5656
// CHECK-NEXT: ret <4 x float> [[TMP2]]
5757
//
58-
float32x4_t test_vsubq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
58+
float32x4_t test_vaddq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
59+
{
60+
#ifdef POLYMORPHIC
61+
return vaddq_m(inactive, a, b, p);
62+
#else /* POLYMORPHIC */
63+
return vaddq_m_f32(inactive, a, b, p);
64+
#endif /* POLYMORPHIC */
65+
}
66+
67+
// CHECK-LABEL: @test_vaddq_x_u16(
68+
// CHECK-NEXT: entry:
69+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
70+
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
71+
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef)
72+
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
73+
//
74+
uint16x8_t test_vaddq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
75+
{
76+
#ifdef POLYMORPHIC
77+
return vaddq_x(a, b, p);
78+
#else /* POLYMORPHIC */
79+
return vaddq_x_u16(a, b, p);
80+
#endif /* POLYMORPHIC */
81+
}
82+
83+
// CHECK-LABEL: @test_vaddq_x_f16(
84+
// CHECK-NEXT: entry:
85+
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
86+
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
87+
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.add.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> undef)
88+
// CHECK-NEXT: ret <8 x half> [[TMP2]]
89+
//
90+
float16x8_t test_vaddq_x_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
5991
{
6092
#ifdef POLYMORPHIC
61-
return vsubq_m(inactive, a, b, p);
93+
return vaddq_x(a, b, p);
6294
#else /* POLYMORPHIC */
63-
return vsubq_m_f32(inactive, a, b, p);
95+
return vaddq_x_f16(a, b, p);
6496
#endif /* POLYMORPHIC */
6597
}

0 commit comments

Comments
 (0)