@@ -6630,60 +6630,43 @@ defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
6630
6630
// CodeGen patterns for addhn and subhn instructions, which can actually be
6631
6631
// written in LLVM IR without too much difficulty.
6632
6632
6633
- // Prioritize ADDHN and SUBHN over UZP2.
6634
- let AddedComplexity = 10 in {
6635
-
6636
- // ADDHN
6637
- def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
6638
- (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6639
- def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6640
- (i32 16))))),
6641
- (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6642
- def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6643
- (i32 32))))),
6644
- (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6645
- def : Pat<(concat_vectors (v8i8 V64:$Rd),
6646
- (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6647
- (i32 8))))),
6648
- (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6649
- V128:$Rn, V128:$Rm)>;
6650
- def : Pat<(concat_vectors (v4i16 V64:$Rd),
6651
- (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6652
- (i32 16))))),
6653
- (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6654
- V128:$Rn, V128:$Rm)>;
6655
- def : Pat<(concat_vectors (v2i32 V64:$Rd),
6656
- (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6657
- (i32 32))))),
6658
- (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6659
- V128:$Rn, V128:$Rm)>;
6660
-
6661
- // SUBHN
6662
- def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
6663
- (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6664
- def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6665
- (i32 16))))),
6666
- (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6667
- def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6668
- (i32 32))))),
6669
- (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6670
- def : Pat<(concat_vectors (v8i8 V64:$Rd),
6671
- (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6672
- (i32 8))))),
6673
- (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6674
- V128:$Rn, V128:$Rm)>;
6675
- def : Pat<(concat_vectors (v4i16 V64:$Rd),
6676
- (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6677
- (i32 16))))),
6678
- (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6679
- V128:$Rn, V128:$Rm)>;
6680
- def : Pat<(concat_vectors (v2i32 V64:$Rd),
6681
- (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6682
- (i32 32))))),
6683
- (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6684
- V128:$Rn, V128:$Rm)>;
6685
-
6686
- } // AddedComplexity = 10
6633
+ multiclass AddSubHNPatterns<Instruction ADDHN, Instruction ADDHN2, Instruction SUBHN,
6634
+ Instruction SUBHN2, ValueType VT64, ValueType VT128, int Shift> {
6635
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 Shift))))),
6636
+ (ADDHN V128:$Rn, V128:$Rm)>;
6637
+ let AddedComplexity = 10 in
6638
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
6639
+ (trunc (VT128 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6640
+ (i32 Shift))))),
6641
+ (ADDHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
6642
+
6643
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 Shift))))),
6644
+ (SUBHN V128:$Rn, V128:$Rm)>;
6645
+ let AddedComplexity = 10 in
6646
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
6647
+ (trunc (VT128 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6648
+ (i32 Shift))))),
6649
+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>;
6650
+
6651
+ // xor by -1 can also be treated as sub
6652
+ def : Pat<(VT64 (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm), (i32 Shift))))),
6653
+ (SUBHN V128:$Rm, V128:$Rn)>;
6654
+ let AddedComplexity = 10 in
6655
+ def : Pat<(concat_vectors (VT64 V64:$Rd),
6656
+ (trunc (VT128 (AArch64vlshr (xor V128:$Rn, immAllOnesV:$Rm),
6657
+ (i32 Shift))))),
6658
+ (SUBHN2 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rm, V128:$Rn)>;
6659
+ }
6660
+
6661
+ defm : AddSubHNPatterns<ADDHNv8i16_v8i8, ADDHNv8i16_v16i8,
6662
+ SUBHNv8i16_v8i8, SUBHNv8i16_v16i8,
6663
+ v8i8, v8i16, 8>;
6664
+ defm : AddSubHNPatterns<ADDHNv4i32_v4i16, ADDHNv4i32_v8i16,
6665
+ SUBHNv4i32_v4i16, SUBHNv4i32_v8i16,
6666
+ v4i16, v4i32, 16>;
6667
+ defm : AddSubHNPatterns<ADDHNv2i64_v2i32, ADDHNv2i64_v4i32,
6668
+ SUBHNv2i64_v2i32, SUBHNv2i64_v4i32,
6669
+ v2i32, v2i64, 32>;
6687
6670
6688
6671
//----------------------------------------------------------------------------
6689
6672
// AdvSIMD bitwise extract from vector instruction.
0 commit comments