@@ -2222,64 +2222,6 @@ defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8, avgceilu>;
2222
2222
defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16, avgceilu>;
2223
2223
defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32, avgceilu>;
2224
2224
2225
- // Rounding Halving Add perform the arithemtic operation with an extra bit of
2226
- // precision, before performing the shift, to void clipping errors. We're not
2227
- // modelling that here with these patterns, but we're using no wrap forms of
2228
- // add to ensure that the extra bit of information is not needed for the
2229
- // arithmetic or the rounding.
2230
- let Predicates = [HasMVEInt] in {
2231
- def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2232
- (v16i8 (ARMvmovImm (i32 3585)))),
2233
- (i32 1))),
2234
- (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
2235
- def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2236
- (v8i16 (ARMvmovImm (i32 2049)))),
2237
- (i32 1))),
2238
- (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
2239
- def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2240
- (v4i32 (ARMvmovImm (i32 1)))),
2241
- (i32 1))),
2242
- (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
2243
- def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2244
- (v16i8 (ARMvmovImm (i32 3585)))),
2245
- (i32 1))),
2246
- (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
2247
- def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2248
- (v8i16 (ARMvmovImm (i32 2049)))),
2249
- (i32 1))),
2250
- (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
2251
- def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2252
- (v4i32 (ARMvmovImm (i32 1)))),
2253
- (i32 1))),
2254
- (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
2255
-
2256
- def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2257
- (v16i8 (ARMvdup (i32 1)))),
2258
- (i32 1))),
2259
- (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
2260
- def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2261
- (v8i16 (ARMvdup (i32 1)))),
2262
- (i32 1))),
2263
- (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
2264
- def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2265
- (v4i32 (ARMvdup (i32 1)))),
2266
- (i32 1))),
2267
- (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
2268
- def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
2269
- (v16i8 (ARMvdup (i32 1)))),
2270
- (i32 1))),
2271
- (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
2272
- def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
2273
- (v8i16 (ARMvdup (i32 1)))),
2274
- (i32 1))),
2275
- (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
2276
- def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
2277
- (v4i32 (ARMvdup (i32 1)))),
2278
- (i32 1))),
2279
- (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
2280
- }
2281
-
2282
-
2283
2225
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
2284
2226
bits<2> size, list<dag> pattern=[]>
2285
2227
: MVE_int<iname, suffix, size, pattern> {
@@ -2303,8 +2245,7 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size,
2303
2245
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
2304
2246
2305
2247
multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, SDNode Op,
2306
- SDPatternOperator unpred_op, Intrinsic PredInt, PatFrag add_op,
2307
- SDNode shift_op> {
2248
+ SDPatternOperator unpred_op, Intrinsic PredInt> {
2308
2249
def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
2309
2250
defvar Inst = !cast<Instruction>(NAME);
2310
2251
defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 VTI.Unsigned)), !cast<Instruction>(NAME)>;
@@ -2313,26 +2254,18 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, SDNode Op,
2313
2254
// Unpredicated add-and-divide-by-two
2314
2255
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
2315
2256
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
2316
-
2317
- def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
2318
- (Inst MQPR:$Qm, MQPR:$Qn)>;
2319
2257
}
2320
2258
}
2321
2259
2322
- multiclass MVE_VHADD<MVEVectorVTInfo VTI, SDNode Op, PatFrag add_op, SDNode shift_op>
2323
- : MVE_VHADD_m<VTI, Op, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
2324
- shift_op>;
2260
+ multiclass MVE_VHADD<MVEVectorVTInfo VTI, SDNode Op>
2261
+ : MVE_VHADD_m<VTI, Op, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
2325
2262
2326
- // Halving add/sub perform the arithemtic operation with an extra bit of
2327
- // precision, before performing the shift, to void clipping errors. We're not
2328
- // modelling that here with these patterns, but we're using no wrap forms of
2329
- // add/sub to ensure that the extra bit of information is not needed.
2330
- defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, avgfloors, addnsw, ARMvshrsImm>;
2331
- defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, avgfloors, addnsw, ARMvshrsImm>;
2332
- defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, avgfloors, addnsw, ARMvshrsImm>;
2333
- defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, avgflooru, addnuw, ARMvshruImm>;
2334
- defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, avgflooru, addnuw, ARMvshruImm>;
2335
- defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, avgflooru, addnuw, ARMvshruImm>;
2263
+ defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, avgfloors>;
2264
+ defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, avgfloors>;
2265
+ defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, avgfloors>;
2266
+ defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, avgflooru>;
2267
+ defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, avgflooru>;
2268
+ defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, avgflooru>;
2336
2269
2337
2270
multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
2338
2271
SDPatternOperator unpred_op, Intrinsic pred_int, PatFrag sub_op,
0 commit comments