Skip to content

Commit 4994aa7

Browse files
SpencerAbsonsmallp-o-p
authored andcommitted
[AArch64][SVE] Use INS when moving elements from bottom 128b of SVE type (llvm#114034)
Moving elements from a scalable vector to a fixed-lengh vector should use[ INS (vector, element) ](https://developer.arm.com/documentation/100069/0606/SIMD-Vector-Instructions/INS--vector--element-) when we know that the extracted element is in the bottom 128-bits of the scalable vector. This avoids inserting unecessary UMOV/FMOV instructions.
1 parent 40fb0e0 commit 4994aa7

File tree

5 files changed

+568
-96
lines changed

5 files changed

+568
-96
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7227,8 +7227,23 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
72277227
V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
72287228
)>;
72297229

7230-
multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
7231-
ValueType VTScal, Instruction INS> {
7230+
// Move elements between vectors
7231+
multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE,
7232+
ValueType VTScal, Operand SVEIdxTy, Instruction INS> {
7233+
// Extracting from the lowest 128-bits of an SVE vector
7234+
def : Pat<(VT128 (vector_insert VT128:$Rn,
7235+
(VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
7236+
(i64 imm:$Immd))),
7237+
(INS VT128:$Rn, imm:$Immd, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), SVEIdxTy:$Immn)>;
7238+
7239+
def : Pat<(VT64 (vector_insert VT64:$Rn,
7240+
(VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
7241+
(i64 imm:$Immd))),
7242+
(EXTRACT_SUBREG
7243+
(INS (SUBREG_TO_REG (i64 0), VT64:$Rn, dsub), imm:$Immd,
7244+
(VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), SVEIdxTy:$Immn),
7245+
dsub)>;
7246+
// Extracting from another NEON vector
72327247
def : Pat<(VT128 (vector_insert V128:$src,
72337248
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
72347249
(i64 imm:$Immd))),
@@ -7256,15 +7271,15 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
72567271
dsub)>;
72577272
}
72587273

7259-
defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
7260-
defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
7261-
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
7262-
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
7274+
defm : Neon_INS_elt_pattern<v8f16, v4f16, nxv8f16, f16, VectorIndexH, INSvi16lane>;
7275+
defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane>;
7276+
defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
7277+
defm : Neon_INS_elt_pattern<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane>;
72637278

7264-
defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
7265-
defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
7266-
defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
7267-
defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
7279+
defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane>;
7280+
defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
7281+
defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane>;
7282+
defm : Neon_INS_elt_pattern<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane>;
72687283

72697284
// Insert from bitcast
72707285
// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3366,6 +3366,21 @@ let Predicates = [HasSVEorSME] in {
33663366
(UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
33673367
def : Pat<(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)),
33683368
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
3369+
3370+
// Move element from the bottom 128-bits of a scalable vector to a single-element vector.
3371+
// Alternative case where insertelement is just scalar_to_vector rather than vector_insert.
3372+
def : Pat<(v1f64 (scalar_to_vector
3373+
(f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))),
3374+
(EXTRACT_SUBREG
3375+
(INSvi64lane (IMPLICIT_DEF), (i64 0),
3376+
(EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index),
3377+
dsub)>;
3378+
def : Pat<(v1i64 (scalar_to_vector
3379+
(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))),
3380+
(EXTRACT_SUBREG
3381+
(INSvi64lane (IMPLICIT_DEF), (i64 0),
3382+
(EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index),
3383+
dsub)>;
33693384
} // End HasNEON
33703385

33713386
let Predicates = [HasNEON] in {

0 commit comments

Comments
 (0)