Skip to content

[AArch64][GlobalISel] Add lowering for constant BIT/BIF/BSP #65897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,14 @@ def unmerge_ext_to_unmerge : GICombineRule<
(apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }])
>;

def regtriple_matchdata : GIDefMatchData<"std::tuple<Register, Register, Register>">;
def or_to_bsp: GICombineRule <
(defs root:$root, regtriple_matchdata:$matchinfo),
(match (wip_match_opcode G_OR):$root,
[{ return matchOrToBSP(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
>;

// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
Expand Down Expand Up @@ -242,5 +250,5 @@ def AArch64PostLegalizerCombiner
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax]> {
select_to_minmax, or_to_bsp]> {
}
7 changes: 4 additions & 3 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,9 @@ def G_PREFETCH : AArch64GenericInstruction {
let hasSideEffects = 1;
}

// Generic bitwise insert if true.
def G_BIT : AArch64GenericInstruction {
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this opcode needs a bit of documentation since it's not a real AArch64 instruction.

let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
let hasSideEffects = 0;
Expand Down Expand Up @@ -252,7 +253,7 @@ def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;

def : GINodeEquiv<G_BIT, AArch64bit>;
def : GINodeEquiv<G_BSP, AArch64bsp>;

def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1791,7 +1791,7 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
if (DstSize == 64)
Mask = MIRBuilder.buildFNeg(VecTy, Mask);

auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins1, Ins2});

// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
// want this to eventually become an EXTRACT_SUBREG.
Expand Down
41 changes: 41 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,47 @@ void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
Store.eraseFromParent();
}

bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
std::tuple<Register, Register, Register> &MatchInfo) {
const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
if (!DstTy.isVector())
return false;

Register AO1, AO2, BVO1, BVO2;
if (!mi_match(
MI, MRI,
m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)), m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
return false;

auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
if (!BV1 || !BV2)
return false;

for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
auto ValAndVReg1 =
getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
auto ValAndVReg2 =
getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
if (!ValAndVReg1 || !ValAndVReg2 ||
ValAndVReg1->Value != ~ValAndVReg2->Value)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer *ValAndReg1 for ValAndReg2 precedence is not obvious.

return false;
}

MatchInfo = {AO1, AO2, BVO2};
return true;
}

void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
std::tuple<Register, Register, Register> &MatchInfo) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(
AArch64::G_BSP, {MI.getOperand(0).getReg()},
{std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
MI.eraseFromParent();
}

class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
Expand Down
50 changes: 26 additions & 24 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,19 @@ body: |
liveins: $s0, $s1
; CHECK-LABEL: name: legalize_s32
; CHECK: liveins: $s0, $s1
; CHECK: %val:_(s32) = COPY $s0
; CHECK: %sign:_(s32) = COPY $s1
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
; CHECK: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
; CHECK: [[BIT:%[0-9]+]]:_(<4 x s32>) = G_BIT [[IVEC]], [[IVEC1]], [[BUILD_VECTOR]]
; CHECK: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BIT]](<4 x s32>)
; CHECK: $s0 = COPY %fcopysign(s32)
; CHECK: RET_ReallyLR implicit $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %val:_(s32) = COPY $s0
; CHECK-NEXT: %sign:_(s32) = COPY $s1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<4 x s32>) = G_BSP [[BUILD_VECTOR]], [[IVEC]], [[IVEC1]]
; CHECK-NEXT: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BSP]](<4 x s32>)
; CHECK-NEXT: $s0 = COPY %fcopysign(s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%val:_(s32) = COPY $s0
%sign:_(s32) = COPY $s1
%fcopysign:_(s32) = G_FCOPYSIGN %val, %sign(s32)
Expand All @@ -37,18 +38,19 @@ body: |
liveins: $d0, $d1
; CHECK-LABEL: name: legalize_s64
; CHECK: liveins: $d0, $d1
; CHECK: %val:_(s64) = COPY $d0
; CHECK: %sign:_(s64) = COPY $d1
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
; CHECK: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
; CHECK: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
; CHECK: [[BIT:%[0-9]+]]:_(<2 x s64>) = G_BIT [[IVEC]], [[IVEC1]], [[FNEG]]
; CHECK: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BIT]](<2 x s64>)
; CHECK: $d0 = COPY %fcopysign(s64)
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %val:_(s64) = COPY $d0
; CHECK-NEXT: %sign:_(s64) = COPY $d1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<2 x s64>) = G_BSP [[FNEG]], [[IVEC]], [[IVEC1]]
; CHECK-NEXT: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BSP]](<2 x s64>)
; CHECK-NEXT: $d0 = COPY %fcopysign(s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%val:_(s64) = COPY $d0
%sign:_(s64) = COPY $d1
%fcopysign:_(s64) = G_FCOPYSIGN %val, %sign(s64)
Expand Down
90 changes: 48 additions & 42 deletions llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ body: |

; CHECK-LABEL: name: BITv8i8_v2s32
; CHECK: liveins: $d0, $d1, $d2
; CHECK: %lhs:fpr64 = COPY $d0
; CHECK: %mhs:fpr64 = COPY $d1
; CHECK: %rhs:fpr64 = COPY $d2
; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs
; CHECK: $d0 = COPY %bit
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:fpr64 = COPY $d0
; CHECK-NEXT: %mhs:fpr64 = COPY $d1
; CHECK-NEXT: %rhs:fpr64 = COPY $d2
; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs
; CHECK-NEXT: $d0 = COPY %bit
; CHECK-NEXT: RET_ReallyLR implicit $d0
%lhs:fpr(<2 x s32>) = COPY $d0
%mhs:fpr(<2 x s32>) = COPY $d1
%rhs:fpr(<2 x s32>) = COPY $d2
%bit:fpr(<2 x s32>) = G_BIT %lhs, %mhs, %rhs
%bit:fpr(<2 x s32>) = G_BSP %lhs, %mhs, %rhs
$d0 = COPY %bit(<2 x s32>)
RET_ReallyLR implicit $d0

Expand All @@ -37,16 +38,17 @@ body: |
liveins: $d0, $d1, $d2
; CHECK-LABEL: name: BITv8i8_v4s16
; CHECK: liveins: $d0, $d1, $d2
; CHECK: %lhs:fpr64 = COPY $d0
; CHECK: %mhs:fpr64 = COPY $d1
; CHECK: %rhs:fpr64 = COPY $d2
; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs
; CHECK: $d0 = COPY %bit
; CHECK: RET_ReallyLR implicit $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:fpr64 = COPY $d0
; CHECK-NEXT: %mhs:fpr64 = COPY $d1
; CHECK-NEXT: %rhs:fpr64 = COPY $d2
; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs
; CHECK-NEXT: $d0 = COPY %bit
; CHECK-NEXT: RET_ReallyLR implicit $d0
%lhs:fpr(<4 x s16>) = COPY $d0
%mhs:fpr(<4 x s16>) = COPY $d1
%rhs:fpr(<4 x s16>) = COPY $d2
%bit:fpr(<4 x s16>) = G_BIT %lhs, %mhs, %rhs
%bit:fpr(<4 x s16>) = G_BSP %lhs, %mhs, %rhs
$d0 = COPY %bit(<4 x s16>)
RET_ReallyLR implicit $d0

Expand All @@ -62,16 +64,17 @@ body: |

; CHECK-LABEL: name: BITv16i8_v2s64
; CHECK: liveins: $q0, $q1, $q2
; CHECK: %lhs:fpr128 = COPY $q0
; CHECK: %mhs:fpr128 = COPY $q1
; CHECK: %rhs:fpr128 = COPY $q2
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
; CHECK: $q0 = COPY %bit
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
; CHECK-NEXT: $q0 = COPY %bit
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<2 x s64>) = COPY $q0
%mhs:fpr(<2 x s64>) = COPY $q1
%rhs:fpr(<2 x s64>) = COPY $q2
%bit:fpr(<2 x s64>) = G_BIT %lhs, %mhs, %rhs
%bit:fpr(<2 x s64>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<2 x s64>)
RET_ReallyLR implicit $q0

Expand All @@ -87,16 +90,17 @@ body: |

; CHECK-LABEL: name: BITv16i8_v4s32
; CHECK: liveins: $q0, $q1, $q2
; CHECK: %lhs:fpr128 = COPY $q0
; CHECK: %mhs:fpr128 = COPY $q1
; CHECK: %rhs:fpr128 = COPY $q2
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
; CHECK: $q0 = COPY %bit
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
; CHECK-NEXT: $q0 = COPY %bit
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<4 x s32>) = COPY $q0
%mhs:fpr(<4 x s32>) = COPY $q1
%rhs:fpr(<4 x s32>) = COPY $q2
%bit:fpr(<4 x s32>) = G_BIT %lhs, %mhs, %rhs
%bit:fpr(<4 x s32>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<4 x s32>)
RET_ReallyLR implicit $q0

Expand All @@ -112,16 +116,17 @@ body: |

; CHECK-LABEL: name: BITv16i8_v8s16
; CHECK: liveins: $q0, $q1, $q2
; CHECK: %lhs:fpr128 = COPY $q0
; CHECK: %mhs:fpr128 = COPY $q1
; CHECK: %rhs:fpr128 = COPY $q2
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
; CHECK: $q0 = COPY %bit
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
; CHECK-NEXT: $q0 = COPY %bit
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<8 x s16>) = COPY $q0
%mhs:fpr(<8 x s16>) = COPY $q1
%rhs:fpr(<8 x s16>) = COPY $q2
%bit:fpr(<8 x s16>) = G_BIT %lhs, %mhs, %rhs
%bit:fpr(<8 x s16>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<8 x s16>)
RET_ReallyLR implicit $q0

Expand All @@ -137,15 +142,16 @@ body: |

; CHECK-LABEL: name: BITv16i8_v16s8
; CHECK: liveins: $q0, $q1, $q2
; CHECK: %lhs:fpr128 = COPY $q0
; CHECK: %mhs:fpr128 = COPY $q1
; CHECK: %rhs:fpr128 = COPY $q2
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
; CHECK: $q0 = COPY %bit
; CHECK: RET_ReallyLR implicit $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
; CHECK-NEXT: $q0 = COPY %bit
; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<16 x s8>) = COPY $q0
%mhs:fpr(<16 x s8>) = COPY $q1
%rhs:fpr(<16 x s8>) = COPY $q2
%bit:fpr(<16 x s8>) = G_BIT %lhs, %mhs, %rhs
%bit:fpr(<16 x s8>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<16 x s8>)
RET_ReallyLR implicit $q0
Loading