Skip to content

Commit 6718fda

Browse files
committed
[CodeGen] Fix issues with subvector intrinsic index types
This patch addresses issues arising from the fact that the index type used for subvector insertion/extraction is inconsistent between the intrinsics and SDNodes. The intrinsic forms require i64 whereas the SDNodes use the type returned by SelectionDAG::getVectorIdxTy. Rather than update the intrinsic definitions to use an overloaded index type, this patch fixes the issue by transforming the index to the correct type as required. Any loss of index bits going from i64 to a smaller type is unexpected, and will be caught by an assertion in SelectionDAG::getVectorIdxConstant. The patch also updates the documentation for INSERT_SUBVECTOR and adds an assertion to its creation to bring it in line with EXTRACT_SUBVECTOR. This necessitated changes to AArch64 which was using i64 for EXTRACT_SUBVECTOR but i32 for INSERT_SUBVECTOR. Only one test changed its codegen after updating the backend accordingly. Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D97459
1 parent 65fb706 commit 6718fda

File tree

8 files changed

+40
-21
lines changed

8 files changed

+40
-21
lines changed

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,9 @@ enum NodeType {
520520
/// The elements of VECTOR1 starting at IDX are overwritten with VECTOR2.
521521
/// Elements IDX through (IDX + num_elements(T) - 1) must be valid VECTOR1
522522
/// indices. If this condition cannot be determined statically but is false at
523-
/// runtime, then the result vector is undefined.
523+
/// runtime, then the result vector is undefined. The IDX parameter must be a
524+
/// vector index constant type, which for most targets will be an integer
525+
/// pointer type.
524526
///
525527
/// This operation supports inserting a fixed-width vector into a scalable
526528
/// vector, but not the other way around.

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5608,9 +5608,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
56085608
N1VT.getVectorMinNumElements()) &&
56095609
"Extract subvector overflow!");
56105610
assert(N2C->getAPIntValue().getBitWidth() ==
5611-
TLI->getVectorIdxTy(getDataLayout())
5612-
.getSizeInBits()
5613-
.getFixedSize() &&
5611+
TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
56145612
"Constant index for EXTRACT_SUBVECTOR has an invalid size");
56155613

56165614
// Trivial extraction.
@@ -5830,6 +5828,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
58305828
cast<ConstantSDNode>(N3)->getZExtValue()) <=
58315829
VT.getVectorMinNumElements()) &&
58325830
"Insert subvector overflow!");
5831+
assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() ==
5832+
TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
5833+
"Constant index for INSERT_SUBVECTOR has an invalid size");
58335834

58345835
// Trivial insertion.
58355836
if (VT == N2VT)

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7009,6 +7009,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
70097009
SDValue Vec = getValue(I.getOperand(0));
70107010
SDValue SubVec = getValue(I.getOperand(1));
70117011
SDValue Index = getValue(I.getOperand(2));
7012+
7013+
// The intrinsic's index type is i64, but the SDNode requires an index type
7014+
// suitable for the target. Convert the index as required.
7015+
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7016+
if (Index.getValueType() != VectorIdxTy)
7017+
Index = DAG.getVectorIdxConstant(
7018+
cast<ConstantSDNode>(Index)->getZExtValue(), DL);
7019+
70127020
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
70137021
setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
70147022
Index));
@@ -7021,6 +7029,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
70217029
SDValue Index = getValue(I.getOperand(1));
70227030
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
70237031

7032+
// The intrinsic's index type is i64, but the SDNode requires an index type
7033+
// suitable for the target. Convert the index as required.
7034+
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7035+
if (Index.getValueType() != VectorIdxTy)
7036+
Index = DAG.getVectorIdxConstant(
7037+
cast<ConstantSDNode>(Index)->getZExtValue(), DL);
7038+
70247039
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
70257040
return;
70267041
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8023,7 +8023,7 @@ static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
80238023
SDLoc DL(V64Reg);
80248024

80258025
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
8026-
V64Reg, DAG.getConstant(0, DL, MVT::i32));
8026+
V64Reg, DAG.getConstant(0, DL, MVT::i64));
80278027
}
80288028

80298029
/// getExtFactor - Determine the adjustment factor for the position when

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10517,7 +10517,7 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
1051710517
(v2i32 (AArch64duplane32
1051810518
(v4i32 V128:$Rm),
1051910519
VectorIndexS:$idx)))),
10520-
(i32 0))),
10520+
(i64 0))),
1052110521
(i64 0))))),
1052210522
(EXTRACT_SUBREG
1052310523
(v2i32 (!cast<Instruction>(NAME # v2i32_indexed)

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5607,7 +5607,7 @@ def : Pat<(v4i32 (opNode V128:$Rn)),
56075607

56085608
// If none did, fallback to the explicit patterns, consuming the vector_extract.
56095609
def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
5610-
(i32 0)), (i64 0))),
5610+
(i64 0)), (i64 0))),
56115611
(EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
56125612
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
56135613
bsub), ssub)>;
@@ -5616,7 +5616,7 @@ def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
56165616
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
56175617
bsub), ssub)>;
56185618
def : Pat<(i32 (vector_extract (insert_subvector undef,
5619-
(v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
5619+
(v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
56205620
(EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
56215621
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
56225622
hsub), ssub)>;
@@ -5637,7 +5637,7 @@ multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
56375637
// If there is a sign extension after this intrinsic, consume it as smov already
56385638
// performed it
56395639
def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
5640-
(opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
5640+
(opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
56415641
(i32 (SMOVvi8to32
56425642
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
56435643
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
@@ -5649,7 +5649,7 @@ def : Pat<(i32 (sext_inreg (i32 (vector_extract
56495649
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
56505650
(i64 0)))>;
56515651
def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
5652-
(opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
5652+
(opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
56535653
(i32 (SMOVvi16to32
56545654
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
56555655
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
@@ -5668,7 +5668,7 @@ multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
56685668
// If there is a masking operation keeping only what has been actually
56695669
// generated, consume it.
56705670
def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
5671-
(opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
5671+
(opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
56725672
(i32 (EXTRACT_SUBREG
56735673
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
56745674
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
@@ -5680,7 +5680,7 @@ def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
56805680
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
56815681
ssub))>;
56825682
def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
5683-
(opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
5683+
(opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
56845684
(i32 (EXTRACT_SUBREG
56855685
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
56865686
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
@@ -6003,7 +6003,7 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
60036003
(v2f32 (AArch64duplane32
60046004
(v4f32 (insert_subvector undef,
60056005
(v2f32 (fneg V64:$Rm)),
6006-
(i32 0))),
6006+
(i64 0))),
60076007
VectorIndexS:$idx)))),
60086008
(FMLSv2i32_indexed V64:$Rd, V64:$Rn,
60096009
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
@@ -6024,7 +6024,7 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
60246024
(v4f32 (AArch64duplane32
60256025
(v4f32 (insert_subvector undef,
60266026
(v2f32 (fneg V64:$Rm)),
6027-
(i32 0))),
6027+
(i64 0))),
60286028
VectorIndexS:$idx)))),
60296029
(FMLSv4i32_indexed V128:$Rd, V128:$Rn,
60306030
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
@@ -6055,7 +6055,7 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
60556055
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
60566056
(vector_extract (v4f32 (insert_subvector undef,
60576057
(v2f32 (fneg V64:$Rm)),
6058-
(i32 0))),
6058+
(i64 0))),
60596059
VectorIndexS:$idx))),
60606060
(FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
60616061
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,12 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
9797
; CHECK-LABEL: test_v9i8:
9898
; CHECK: // %bb.0:
9999
; CHECK-NEXT: mov w8, #-1
100-
; CHECK-NEXT: mov v0.b[9], w8
101-
; CHECK-NEXT: mov v0.b[10], w8
102-
; CHECK-NEXT: mov v0.b[11], w8
103-
; CHECK-NEXT: mov v0.b[12], w8
104-
; CHECK-NEXT: mov v0.b[13], w8
105-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
100+
; CHECK-NEXT: mov v1.16b, v0.16b
101+
; CHECK-NEXT: mov v1.b[9], w8
102+
; CHECK-NEXT: mov v1.b[10], w8
103+
; CHECK-NEXT: mov v1.b[11], w8
104+
; CHECK-NEXT: mov v1.b[13], w8
105+
; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
106106
; CHECK-NEXT: and v1.8b, v0.8b, v1.8b
107107
; CHECK-NEXT: umov w8, v1.b[1]
108108
; CHECK-NEXT: umov w9, v1.b[0]

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple riscv32 -mattr=+m,+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
23
; RUN: llc -mtriple riscv64 -mattr=+m,+d,+experimental-zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
34

45
define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {

0 commit comments

Comments
 (0)