Skip to content

Commit e69916c

Browse files
committed
[AArch64][GlobalISel] Legalize integer across-lane intrinsics with actual type
Across-lane intrinsics with integer destination type (uaddv, saddv, umaxv, smavx, uminv, sminv) were legalized with the destination type given in the LLVM IR intrinsic’s definition. It was wider than the actual destination type of the corresponding machine instruction. InstructionSelect was implicitly supposed to generate underlying extension instructions for these intrinsics, while the real destination type was opaque for other GlobalISel passes. Thus, llvm/test/CodeGen/AArch64/arm64-vaddv.ll failed on GlobalISel since the generated code was worse in functions that used the value of an across-lane intrinsic in following FP&SIMD instructions (functions with _used_by_laneop suffix). Here intrinsics are legalized and selected with an actual destination type, making it transparent to other passes. If the destination value is used in further instructions accepting FPR registers, there won’t be extra copies across register banks. i16 type is added to the list of the types of the FPR16 register bank to make it possible, and a few SelectionDAG patterns are modified to eliminate ambiguity in TableGen. Differential Revision: https://reviews.llvm.org/D156831
1 parent 2c629eb commit e69916c

File tree

11 files changed

+167
-117
lines changed

11 files changed

+167
-117
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8601,7 +8601,7 @@ multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
86018601
(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
86028602
(f16 (vector_extract (v8f16 V128:$Rm), VectorIndexH:$idx)))),
86038603
(!cast<Instruction>(NAME # v1i16_indexed)
8604-
(EXTRACT_SUBREG V128:$Rn, hsub), V128:$Rm, VectorIndexH:$idx)>;
8604+
(f16 (EXTRACT_SUBREG V128:$Rn, hsub)), V128:$Rm, VectorIndexH:$idx)>;
86058605
}
86068606

86078607
let Predicates = [HasNEON] in {
@@ -9157,7 +9157,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
91579157
(i64 0))))),
91589158
(!cast<Instruction>(NAME # v1i32_indexed)
91599159
FPR32Op:$Rd,
9160-
(EXTRACT_SUBREG V64:$Rn, hsub),
9160+
(f16 (EXTRACT_SUBREG V64:$Rn, hsub)),
91619161
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
91629162
(i64 0))>;
91639163

@@ -9170,7 +9170,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
91709170
(i64 0))))),
91719171
(!cast<Instruction>(NAME # v1i32_indexed)
91729172
FPR32Op:$Rd,
9173-
(EXTRACT_SUBREG V64:$Rn, hsub),
9173+
(f16 (EXTRACT_SUBREG V64:$Rn, hsub)),
91749174
V128_lo:$Rm,
91759175
VectorIndexH:$idx)>;
91769176

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -303,30 +303,43 @@ def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
303303
def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
304304
(STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
305305

306+
let GIIgnoreCopies = 1 in
307+
class PatIgnoreCopies<dag pattern, dag result> : Pat<pattern, result>, GISelFlags;
308+
306309
multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
307-
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
310+
def : PatIgnoreCopies<(i32 (sext (i8 (intOp (v8i8 V64:$Rn))))),
308311
(i32 (SMOVvi8to32
309312
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
310-
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
313+
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
311314
(i64 0)))>;
312-
def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
315+
def : Pat<(i8 (intOp (v8i8 V64:$Rn))),
316+
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn)>;
317+
318+
def : PatIgnoreCopies<(i32 (sext (i8 (intOp (v16i8 V128:$Rn))))),
313319
(i32 (SMOVvi8to32
314320
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
315321
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
316322
(i64 0)))>;
323+
def : Pat<(i8 (intOp (v16i8 V128:$Rn))),
324+
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn)>;
317325

318-
def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
326+
def : PatIgnoreCopies<(i32 (sext (i16 (intOp (v4i16 V64:$Rn))))),
319327
(i32 (SMOVvi16to32
320328
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
321329
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
322330
(i64 0)))>;
323-
def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
331+
def : Pat<(i16 (intOp (v4i16 V64:$Rn))),
332+
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn)>;
333+
334+
def : PatIgnoreCopies<(i32 (sext (i16 (intOp (v8i16 V128:$Rn))))),
324335
(i32 (SMOVvi16to32
325336
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
326337
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
327338
(i64 0)))>;
339+
def : Pat<(i16 (intOp (v8i16 V128:$Rn))),
340+
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn)>;
328341

329-
def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
342+
def : PatIgnoreCopies<(i32 (intOp (v4i32 V128:$Rn))),
330343
(i32 (EXTRACT_SUBREG
331344
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
332345
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
@@ -335,29 +348,48 @@ multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
335348

336349
multiclass SIMDAcrossLanesUnsignedIntrinsicBHS<string baseOpc,
337350
Intrinsic intOp> {
338-
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
339-
(i32 (EXTRACT_SUBREG
340-
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
341-
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
342-
ssub))>;
343-
def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
344-
(i32 (EXTRACT_SUBREG
345-
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
346-
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
347-
ssub))>;
351+
def : PatIgnoreCopies<(i32 (zext (i8 (intOp (v8i8 V64:$Rn))))),
352+
(COPY_TO_REGCLASS
353+
(i32 (EXTRACT_SUBREG
354+
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
355+
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
356+
ssub)),
357+
GPR32)>;
358+
def : Pat<(i8 (intOp (v8i8 V64:$Rn))),
359+
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn)>;
360+
361+
def : PatIgnoreCopies<(i32 (zext (i8 (intOp (v16i8 V128:$Rn))))),
362+
(COPY_TO_REGCLASS
363+
(i32 (EXTRACT_SUBREG
364+
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
365+
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
366+
ssub)),
367+
GPR32)>;
368+
def : Pat<(i8 (intOp (v16i8 V128:$Rn))),
369+
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn)>;
370+
348371

349-
def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
372+
def : PatIgnoreCopies<(i32 (zext (i16 (intOp (v4i16 V64:$Rn))))),
373+
(COPY_TO_REGCLASS
350374
(i32 (EXTRACT_SUBREG
351375
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
352376
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
353-
ssub))>;
354-
def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
355-
(i32 (EXTRACT_SUBREG
356-
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
357-
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
358-
ssub))>;
377+
ssub)),
378+
GPR32)>;
379+
def : Pat<(i16 (intOp (v4i16 V64:$Rn))),
380+
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn)>;
359381

360-
def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
382+
def : PatIgnoreCopies<(i32 (zext (i16 (intOp (v8i16 V128:$Rn))))),
383+
(COPY_TO_REGCLASS
384+
(i32 (EXTRACT_SUBREG
385+
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
386+
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
387+
ssub)),
388+
GPR32)>;
389+
def : Pat<(i16 (intOp (v8i16 V128:$Rn))),
390+
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn)>;
391+
392+
def : PatIgnoreCopies<(i32 (intOp (v4i32 V128:$Rn))),
361393
(i32 (EXTRACT_SUBREG
362394
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
363395
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
@@ -373,12 +405,23 @@ def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
373405
(ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
374406
ssub))>;
375407

408+
def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
409+
(i64 (EXTRACT_SUBREG
410+
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
411+
(ADDPv2i64p V128:$Rn), dsub),
412+
dsub))>;
413+
376414
defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"ADDV", int_aarch64_neon_uaddv>;
377415
def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
378416
(i32 (EXTRACT_SUBREG
379417
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
380418
(ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
381419
ssub))>;
420+
def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
421+
(i64 (EXTRACT_SUBREG
422+
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
423+
(ADDPv2i64p V128:$Rn), dsub),
424+
dsub))>;
382425

383426
defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMAXV", int_aarch64_neon_smaxv>;
384427
def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3679,27 +3679,28 @@ let Predicates = [IsLE, UseSTRQro] in {
36793679
// Match stores from lane 0 to the appropriate subreg's store.
36803680
multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
36813681
ValueType VecTy, ValueType STy,
3682+
ValueType SubRegTy,
36823683
SubRegIndex SubRegIdx,
36833684
Instruction STRW, Instruction STRX> {
36843685

36853686
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
36863687
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3687-
(STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3688+
(STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
36883689
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
36893690

36903691
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
36913692
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3692-
(STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3693+
(STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
36933694
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
36943695
}
36953696

36963697
let AddedComplexity = 19 in {
3697-
defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
3698-
defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>;
3699-
defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>;
3700-
defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>;
3701-
defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>;
3702-
defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>;
3698+
defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
3699+
defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
3700+
defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
3701+
defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
3702+
defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
3703+
defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
37033704
}
37043705

37053706
//---
@@ -3818,21 +3819,22 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
38183819
// Match stores from lane 0 to the appropriate subreg's store.
38193820
multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
38203821
ValueType VTy, ValueType STy,
3822+
ValueType SubRegTy,
38213823
SubRegIndex SubRegIdx, Operand IndexType,
38223824
Instruction STR> {
38233825
def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
38243826
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
3825-
(STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3827+
(STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
38263828
GPR64sp:$Rn, IndexType:$offset)>;
38273829
}
38283830

38293831
let AddedComplexity = 19 in {
3830-
defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
3831-
defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>;
3832-
defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>;
3833-
defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>;
3834-
defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>;
3835-
defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>;
3832+
defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
3833+
defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
3834+
defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
3835+
defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
3836+
defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
3837+
defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
38363838
}
38373839

38383840
//---
@@ -3961,17 +3963,18 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
39613963
// Match stores from lane 0 to the appropriate subreg's store.
39623964
multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
39633965
ValueType VTy, ValueType STy,
3966+
ValueType SubRegTy,
39643967
SubRegIndex SubRegIdx, Instruction STR> {
3965-
defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
3968+
defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
39663969
}
39673970

39683971
let AddedComplexity = 19 in {
3969-
defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
3970-
defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>;
3971-
defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>;
3972-
defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>;
3973-
defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>;
3974-
defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>;
3972+
defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
3973+
defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>;
3974+
defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>;
3975+
defm : VecStoreULane0Pat<store, v4f32, f32, i32, ssub, STURSi>;
3976+
defm : VecStoreULane0Pat<store, v2i64, i64, i64, dsub, STURDi>;
3977+
defm : VecStoreULane0Pat<store, v2f64, f64, i64, dsub, STURDi>;
39753978
}
39763979

39773980
//---
@@ -4496,7 +4499,7 @@ multiclass FMULScalarFromIndexedLane0Patterns<string inst,
44964499
def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
44974500
(f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
44984501
(!cast<Instruction>(inst # inst_f16_suffix)
4499-
FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>;
4502+
FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>;
45004503
}
45014504
let Predicates = preds in {
45024505
def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
@@ -7064,19 +7067,19 @@ def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
70647067
// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
70657068

70667069
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7067-
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7070+
(SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
70687071
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7069-
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7072+
(SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
70707073
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7071-
(SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7074+
(SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
70727075
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
70737076
(and FPR32:$Rn, (i32 65535)),
70747077
vecshiftR16:$imm)),
7075-
(UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7078+
(UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
70767079
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7077-
(UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7080+
(UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
70787081
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7079-
(UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7082+
(UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
70807083
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
70817084
(i32 (INSERT_SUBREG
70827085
(i32 (IMPLICIT_DEF)),

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
438438
def FPR8 : RegisterClass<"AArch64", [i8], 8, (sequence "B%u", 0, 31)> {
439439
let Size = 8;
440440
}
441-
def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
441+
def FPR16 : RegisterClass<"AArch64", [f16, bf16, i16], 16, (sequence "H%u", 0, 31)> {
442442
let Size = 16;
443443
}
444444

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1134,7 +1134,8 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
11341134

11351135
bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
11361136
MachineInstr &MI) const {
1137-
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
1137+
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
1138+
switch (IntrinsicID) {
11381139
case Intrinsic::vacopy: {
11391140
unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
11401141
unsigned VaListSize =
@@ -1214,6 +1215,36 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
12141215
MI.eraseFromParent();
12151216
return true;
12161217
}
1218+
case Intrinsic::aarch64_neon_uaddv:
1219+
case Intrinsic::aarch64_neon_saddv:
1220+
case Intrinsic::aarch64_neon_umaxv:
1221+
case Intrinsic::aarch64_neon_smaxv:
1222+
case Intrinsic::aarch64_neon_uminv:
1223+
case Intrinsic::aarch64_neon_sminv: {
1224+
MachineIRBuilder MIB(MI);
1225+
MachineRegisterInfo &MRI = *MIB.getMRI();
1226+
bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
1227+
IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
1228+
IntrinsicID == Intrinsic::aarch64_neon_sminv;
1229+
1230+
auto OldDst = MI.getOperand(0).getReg();
1231+
auto OldDstTy = MRI.getType(OldDst);
1232+
LLT NewDstTy = MRI.getType(MI.getOperand(2).getReg()).getElementType();
1233+
if (OldDstTy == NewDstTy)
1234+
return true;
1235+
1236+
auto NewDst = MRI.createGenericVirtualRegister(NewDstTy);
1237+
1238+
Helper.Observer.changingInstr(MI);
1239+
MI.getOperand(0).setReg(NewDst);
1240+
Helper.Observer.changedInstr(MI);
1241+
1242+
MIB.setInsertPt(MIB.getMBB(), ++MIB.getInsertPt());
1243+
MIB.buildExtOrTrunc(IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT,
1244+
OldDst, NewDst);
1245+
1246+
return true;
1247+
}
12171248
}
12181249

12191250
return true;

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,12 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
493493
return false;
494494
case Intrinsic::aarch64_neon_uaddlv:
495495
case Intrinsic::aarch64_neon_uaddv:
496+
case Intrinsic::aarch64_neon_saddv:
496497
case Intrinsic::aarch64_neon_umaxv:
498+
case Intrinsic::aarch64_neon_smaxv:
497499
case Intrinsic::aarch64_neon_uminv:
500+
case Intrinsic::aarch64_neon_sminv:
501+
case Intrinsic::aarch64_neon_faddv:
498502
case Intrinsic::aarch64_neon_fmaxv:
499503
case Intrinsic::aarch64_neon_fminv:
500504
case Intrinsic::aarch64_neon_fmaxnmv:
@@ -505,13 +509,6 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
505509
return SrcTy.getElementType().getSizeInBits() >= 16 &&
506510
SrcTy.getElementCount().getFixedValue() >= 4;
507511
}
508-
case Intrinsic::aarch64_neon_saddv:
509-
case Intrinsic::aarch64_neon_smaxv:
510-
case Intrinsic::aarch64_neon_sminv: {
511-
const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
512-
return SrcTy.getElementType().getSizeInBits() >= 32 &&
513-
SrcTy.getElementCount().getFixedValue() >= 2;
514-
}
515512
}
516513
}
517514

llvm/test/CodeGen/AArch64/arm64-smaxv.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
2+
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
3+
; RUN: llc < %s -global-isel=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
24

35
define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
46
; CHECK: test_vmaxv_s8

0 commit comments

Comments
 (0)