Skip to content

[GlobalIsel] Combine extract vector element #90339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,11 @@ class CombinerHelper {
bool matchExtractVectorElementWithBuildVectorTrunc(const MachineOperand &MO,
BuildFnTy &MatchInfo);

/// Combine extract vector element with a shuffle vector on the vector
/// register.
bool matchExtractVectorElementWithShuffleVector(const MachineOperand &MO,
BuildFnTy &MatchInfo);

/// Combine extract vector element with a insert vector element on the vector
/// register and different indices.
bool matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO,
Expand Down
12 changes: 12 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,18 @@ class GBuildVectorTrunc : public GMergeLikeInstr {
}
};

/// Represents a G_SHUFFLE_VECTOR.
class GShuffleVector : public GenericMachineInstr {
public:
Register getSrc1Reg() const { return getOperand(1).getReg(); }
Register getSrc2Reg() const { return getOperand(2).getReg(); }
ArrayRef<int> getMask() const { return getOperand(3).getShuffleMask(); }

static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR;
}
};

/// Represents a G_PTR_ADD.
class GPtrAdd : public GenericMachineInstr {
public:
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,13 @@ def extract_vector_element_freeze : GICombineRule<
[{ return Helper.matchExtractVectorElementWithFreeze(${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;

def extract_vector_element_shuffle_vector : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask),
(G_EXTRACT_VECTOR_ELT $root, $src, $idx),
[{ return Helper.matchExtractVectorElementWithShuffleVector(${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;

// Combines concat operations
def concat_matchinfo : GIDefMatchData<"SmallVector<Register>">;
def combine_concat_vector : GICombineRule<
Expand Down Expand Up @@ -1582,6 +1589,7 @@ extract_vector_element_build_vector_trunc6,
extract_vector_element_build_vector_trunc7,
extract_vector_element_build_vector_trunc8,
extract_vector_element_freeze,
extract_vector_element_shuffle_vector,
insert_vector_element_extract_vector_element
]>;

Expand Down
106 changes: 106 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,112 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc(
return true;
}

bool CombinerHelper::matchExtractVectorElementWithShuffleVector(
const MachineOperand &MO, BuildFnTy &MatchInfo) {
GExtractVectorElement *Extract =
cast<GExtractVectorElement>(getDefIgnoringCopies(MO.getReg(), MRI));

//
// %zero:_(s64) = G_CONSTANT i64 0
// %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
// shufflemask(0, 0, 0, 0)
// %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %zero(s64)
//
// -->
//
// %zero1:_(s64) = G_CONSTANT i64 0
// %extract:_(s32) = G_EXTRACT_VECTOR_ELT %arg1(<4 x s32>), %zero1(s64)
//
//
//
//
// %three:_(s64) = G_CONSTANT i64 3
// %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
// shufflemask(0, 0, 0, -1)
// %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %three(s64)
//
// -->
//
// %extract:_(s32) = G_IMPLICIT_DEF
//
//
//
//
//
// %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
// shufflemask(0, 0, 0, -1)
// %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %opaque(s64)
//
// -->
//
// %sv:_(<4 x s32>) = G_SHUFFLE_SHUFFLE %arg1(<4 x s32>), %arg2(<4 x s32>),
// shufflemask(0, 0, 0, -1)
// %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %opaque(s64)
//

// We try to get the value of the Index register.
std::optional<ValueAndVReg> MaybeIndex =
getIConstantVRegValWithLookThrough(Extract->getIndexReg(), MRI);
if (!MaybeIndex)
return false;

GShuffleVector *Shuffle =
cast<GShuffleVector>(getDefIgnoringCopies(Extract->getVectorReg(), MRI));
Comment on lines +377 to +378
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unfortunate you need to re-find the def instruction when it was already matched

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but the cast and get def is safer than I KNOW WHAT I AM DOING.


ArrayRef<int> Mask = Shuffle->getMask();

unsigned Offset = MaybeIndex->Value.getZExtValue();
int SrcIdx = Mask[Offset];

LLT Src1Type = MRI.getType(Shuffle->getSrc1Reg());
// At the IR level a <1 x ty> shuffle vector is valid, but we want to extract
// from a vector.
assert(Src1Type.isVector() && "expected to extract from a vector");
unsigned LHSWidth = Src1Type.isVector() ? Src1Type.getNumElements() : 1;

// Note that there is no one use check.
Register Dst = Extract->getReg(0);
LLT DstTy = MRI.getType(Dst);

if (SrcIdx < 0 &&
isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) {
MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); };
return true;
}

// If the legality check failed, then we still have to abort.
if (SrcIdx < 0)
return false;

Register NewVector;

// We check in which vector and at what offset to look through.
if (SrcIdx < (int)LHSWidth) {
NewVector = Shuffle->getSrc1Reg();
// SrcIdx unchanged
} else { // SrcIdx >= LHSWidth
NewVector = Shuffle->getSrc2Reg();
SrcIdx -= LHSWidth;
}

LLT IdxTy = MRI.getType(Extract->getIndexReg());
LLT NewVectorTy = MRI.getType(NewVector);

// We check the legality of the look through.
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_EXTRACT_VECTOR_ELT, {DstTy, NewVectorTy, IdxTy}}) ||
!isConstantLegalOrBeforeLegalizer({IdxTy}))
return false;

// We look through the shuffle vector.
MatchInfo = [=](MachineIRBuilder &B) {
auto Idx = B.buildConstant(IdxTy, SrcIdx);
B.buildExtractVectorElement(Dst, NewVector, Idx);
};

return true;
}

bool CombinerHelper::matchInsertVectorElementOOB(MachineInstr &MI,
BuildFnTy &MatchInfo) {
GInsertVectorElement *Insert = cast<GInsertVectorElement>(&MI);
Expand Down
63 changes: 63 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -571,3 +571,66 @@ body: |
RET_ReallyLR implicit $x0
...
---
name: extract_from_build_vector_shuffle_vector_undef
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: extract_from_build_vector_shuffle_vector_undef
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %extract:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: $w0 = COPY %extract(s32)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(<4 x s32>) = COPY $q0
%arg2:_(<4 x s32>) = COPY $q1
%idx:_(s64) = G_CONSTANT i64 0
%sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2(<4 x s32>), shufflemask(-1, 0, 0, 0)
%extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
$w0 = COPY %extract(s32)
RET_ReallyLR implicit $x0
...
---
name: extract_from_build_vector_shuffle_vector_opaque
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: extract_from_build_vector_shuffle_vector_opaque
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: %arg2:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %idx:_(s64) = COPY $x1
; CHECK-NEXT: %sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2, shufflemask(undef, 0, 0, 0)
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(<4 x s32>) = COPY $q0
%arg2:_(<4 x s32>) = COPY $q1
%idx:_(s64) = COPY $x1
%sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2(<4 x s32>), shufflemask(-1, 0, 0, 0)
%extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
$w0 = COPY %extract(s32)
RET_ReallyLR implicit $x0
...
---
name: extract_from_build_vector_shuffle_vector_const
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: extract_from_build_vector_shuffle_vector_const
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %arg1(<4 x s32>), [[C]](s64)
; CHECK-NEXT: $w0 = COPY %extract(s32)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(<4 x s32>) = COPY $q0
%arg2:_(<4 x s32>) = COPY $q1
%idx:_(s64) = G_CONSTANT i64 0
%sv:_(<4 x s32>) = G_SHUFFLE_VECTOR %arg1(<4 x s32>), %arg2(<4 x s32>), shufflemask(3, 0, 0, 0)
%extract:_(s32) = G_EXTRACT_VECTOR_ELT %sv(<4 x s32>), %idx(s64)
$w0 = COPY %extract(s32)
RET_ReallyLR implicit $x0
...
---
19 changes: 4 additions & 15 deletions llvm/test/CodeGen/AArch64/extract-vector-elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -938,21 +938,10 @@ entry:
}

define i32 @extract_v4i32_shuffle_const(<4 x i32> %a, <4 x i32> %b, i32 %c) {
; CHECK-SD-LABEL: extract_v4i32_shuffle_const:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov w0, s1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: extract_v4i32_shuffle_const:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: mov s0, v0.s[2]
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: extract_v4i32_shuffle_const:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov w0, s1
; CHECK-NEXT: ret
entry:
%vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3>
%d = extractelement <4 x i32> %vector, i32 2
Expand Down