@@ -3679,27 +3679,28 @@ let Predicates = [IsLE, UseSTRQro] in {
3679
3679
// Match stores from lane 0 to the appropriate subreg's store.
3680
3680
multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
3681
3681
ValueType VecTy, ValueType STy,
3682
+ ValueType SubRegTy,
3682
3683
SubRegIndex SubRegIdx,
3683
3684
Instruction STRW, Instruction STRX> {
3684
3685
3685
3686
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3686
3687
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3687
- (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3688
+ (STRW (SubRegTy ( EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx) ),
3688
3689
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3689
3690
3690
3691
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3691
3692
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3692
- (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3693
+ (STRX (SubRegTy ( EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx) ),
3693
3694
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3694
3695
}
3695
3696
3696
3697
let AddedComplexity = 19 in {
3697
- defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
3698
- defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>;
3699
- defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>;
3700
- defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>;
3701
- defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>;
3702
- defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>;
3698
+ defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
3699
+ defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
3700
+ defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
3701
+ defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
3702
+ defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
3703
+ defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
3703
3704
}
3704
3705
3705
3706
//---
@@ -3818,21 +3819,22 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
3818
3819
// Match stores from lane 0 to the appropriate subreg's store.
3819
3820
multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
3820
3821
ValueType VTy, ValueType STy,
3822
+ ValueType SubRegTy,
3821
3823
SubRegIndex SubRegIdx, Operand IndexType,
3822
3824
Instruction STR> {
3823
3825
def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
3824
3826
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
3825
- (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3827
+ (STR (SubRegTy ( EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx) ),
3826
3828
GPR64sp:$Rn, IndexType:$offset)>;
3827
3829
}
3828
3830
3829
3831
let AddedComplexity = 19 in {
3830
- defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
3831
- defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>;
3832
- defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>;
3833
- defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>;
3834
- defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>;
3835
- defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>;
3832
+ defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
3833
+ defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
3834
+ defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
3835
+ defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
3836
+ defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
3837
+ defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
3836
3838
}
3837
3839
3838
3840
//---
@@ -3961,17 +3963,18 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
3961
3963
// Match stores from lane 0 to the appropriate subreg's store.
3962
3964
multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
3963
3965
ValueType VTy, ValueType STy,
3966
+ ValueType SubRegTy,
3964
3967
SubRegIndex SubRegIdx, Instruction STR> {
3965
- defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
3968
+ defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
3966
3969
}
3967
3970
3968
3971
let AddedComplexity = 19 in {
3969
- defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
3970
- defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>;
3971
- defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>;
3972
- defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>;
3973
- defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>;
3974
- defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>;
3972
+ defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
3973
+ defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>;
3974
+ defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>;
3975
+ defm : VecStoreULane0Pat<store, v4f32, f32, i32, ssub, STURSi>;
3976
+ defm : VecStoreULane0Pat<store, v2i64, i64, i64, dsub, STURDi>;
3977
+ defm : VecStoreULane0Pat<store, v2f64, f64, i64, dsub, STURDi>;
3975
3978
}
3976
3979
3977
3980
//---
@@ -4496,7 +4499,7 @@ multiclass FMULScalarFromIndexedLane0Patterns<string inst,
4496
4499
def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
4497
4500
(f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
4498
4501
(!cast<Instruction>(inst # inst_f16_suffix)
4499
- FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>;
4502
+ FPR16:$Rn, (f16 ( EXTRACT_SUBREG V128:$Rm, hsub) ))>;
4500
4503
}
4501
4504
let Predicates = preds in {
4502
4505
def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
@@ -7064,19 +7067,19 @@ def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
7064
7067
// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7065
7068
7066
7069
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7067
- (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7070
+ (SCVTFh (f16 ( EXTRACT_SUBREG FPR32:$Rn, hsub) ), vecshiftR16:$imm)>;
7068
7071
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7069
- (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7072
+ (SCVTFh (f16 ( EXTRACT_SUBREG FPR32:$Rn, hsub) ), vecshiftR16:$imm)>;
7070
7073
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7071
- (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7074
+ (SCVTFh (f16 ( EXTRACT_SUBREG FPR64:$Rn, hsub) ), vecshiftR16:$imm)>;
7072
7075
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
7073
7076
(and FPR32:$Rn, (i32 65535)),
7074
7077
vecshiftR16:$imm)),
7075
- (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7078
+ (UCVTFh (f16 ( EXTRACT_SUBREG FPR32:$Rn, hsub) ), vecshiftR16:$imm)>;
7076
7079
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7077
- (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7080
+ (UCVTFh (f16 ( EXTRACT_SUBREG FPR32:$Rn, hsub) ), vecshiftR16:$imm)>;
7078
7081
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7079
- (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7082
+ (UCVTFh (f16 ( EXTRACT_SUBREG FPR64:$Rn, hsub) ), vecshiftR16:$imm)>;
7080
7083
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
7081
7084
(i32 (INSERT_SUBREG
7082
7085
(i32 (IMPLICIT_DEF)),
0 commit comments