Skip to content

Commit 2861ec8

Browse files
committed
[AArch64][GlobalISel] Add lowering for constant BIT/BIF/BSP (#65897)
The non-constant bit/bif/bsp already work through tablegen patterns, this patch handles the constant case, mirroring the basic support for `or(and(X, C), and(Y, ~C))` from ISel tryCombineToBSL. BSP gets expanded to either BIT, BIF or BSL depending on the best register allocation. G_BIT can be replaced with G_BSP as a more general alternative.
1 parent 9f7906a commit 2861ec8

File tree

7 files changed

+153
-130
lines changed

7 files changed

+153
-130
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,14 @@ def unmerge_ext_to_unmerge : GICombineRule<
215215
(apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }])
216216
>;
217217

218+
def regtriple_matchdata : GIDefMatchData<"std::tuple<Register, Register, Register>">;
219+
def or_to_bsp: GICombineRule <
220+
(defs root:$root, regtriple_matchdata:$matchinfo),
221+
(match (wip_match_opcode G_OR):$root,
222+
[{ return matchOrToBSP(*${root}, MRI, ${matchinfo}); }]),
223+
(apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
224+
>;
225+
218226
// Post-legalization combines which should happen at all optimization levels.
219227
// (E.g. ones that facilitate matching for the selector) For example, matching
220228
// pseudos.
@@ -242,5 +250,5 @@ def AArch64PostLegalizerCombiner
242250
constant_fold_binops, identity_combines,
243251
ptr_add_immed_chain, overlapping_and,
244252
split_store_zero_128, undef_combines,
245-
select_to_minmax]> {
253+
select_to_minmax, or_to_bsp]> {
246254
}

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,9 @@ def G_PREFETCH : AArch64GenericInstruction {
215215
let hasSideEffects = 1;
216216
}
217217

218-
// Generic bitwise insert if true.
219-
def G_BIT : AArch64GenericInstruction {
218+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
219+
// expands into BSL/BIT/BIF after register allocation.
220+
def G_BSP : AArch64GenericInstruction {
220221
let OutOperandList = (outs type0:$dst);
221222
let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
222223
let hasSideEffects = 0;
@@ -252,7 +253,7 @@ def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
252253
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
253254
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
254255

255-
def : GINodeEquiv<G_BIT, AArch64bit>;
256+
def : GINodeEquiv<G_BSP, AArch64bsp>;
256257

257258
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
258259

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1790,7 +1790,7 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
17901790
if (DstSize == 64)
17911791
Mask = MIRBuilder.buildFNeg(VecTy, Mask);
17921792

1793-
auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
1793+
auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins2, Ins1});
17941794

17951795
// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
17961796
// want this to eventually become an EXTRACT_SUBREG.

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,47 @@ void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
338338
Store.eraseFromParent();
339339
}
340340

341+
bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
342+
std::tuple<Register, Register, Register> &MatchInfo) {
343+
const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
344+
if (!DstTy.isVector())
345+
return false;
346+
347+
Register AO1, AO2, BVO1, BVO2;
348+
if (!mi_match(MI, MRI,
349+
m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),
350+
m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
351+
return false;
352+
353+
auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
354+
auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
355+
if (!BV1 || !BV2)
356+
return false;
357+
358+
for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
359+
auto ValAndVReg1 =
360+
getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
361+
auto ValAndVReg2 =
362+
getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
363+
if (!ValAndVReg1 || !ValAndVReg2 ||
364+
ValAndVReg1->Value != ~ValAndVReg2->Value)
365+
return false;
366+
}
367+
368+
MatchInfo = {AO1, AO2, BVO1};
369+
return true;
370+
}
371+
372+
void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
373+
MachineIRBuilder &B,
374+
std::tuple<Register, Register, Register> &MatchInfo) {
375+
B.setInstrAndDebugLoc(MI);
376+
B.buildInstr(
377+
AArch64::G_BSP, {MI.getOperand(0).getReg()},
378+
{std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
379+
MI.eraseFromParent();
380+
}
381+
341382
class AArch64PostLegalizerCombinerImpl : public Combiner {
342383
protected:
343384
// TODO: Make CombinerHelper methods const.

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,19 @@ body: |
1010
liveins: $s0, $s1
1111
; CHECK-LABEL: name: legalize_s32
1212
; CHECK: liveins: $s0, $s1
13-
; CHECK: %val:_(s32) = COPY $s0
14-
; CHECK: %sign:_(s32) = COPY $s1
15-
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
16-
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
17-
; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
18-
; CHECK: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
19-
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
20-
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
21-
; CHECK: [[BIT:%[0-9]+]]:_(<4 x s32>) = G_BIT [[IVEC]], [[IVEC1]], [[BUILD_VECTOR]]
22-
; CHECK: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BIT]](<4 x s32>)
23-
; CHECK: $s0 = COPY %fcopysign(s32)
24-
; CHECK: RET_ReallyLR implicit $s0
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: %val:_(s32) = COPY $s0
15+
; CHECK-NEXT: %sign:_(s32) = COPY $s1
16+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
17+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
18+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
19+
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
20+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
21+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
22+
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<4 x s32>) = G_BSP [[BUILD_VECTOR]], [[IVEC1]], [[IVEC]]
23+
; CHECK-NEXT: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BSP]](<4 x s32>)
24+
; CHECK-NEXT: $s0 = COPY %fcopysign(s32)
25+
; CHECK-NEXT: RET_ReallyLR implicit $s0
2526
%val:_(s32) = COPY $s0
2627
%sign:_(s32) = COPY $s1
2728
%fcopysign:_(s32) = G_FCOPYSIGN %val, %sign(s32)
@@ -37,18 +38,19 @@ body: |
3738
liveins: $d0, $d1
3839
; CHECK-LABEL: name: legalize_s64
3940
; CHECK: liveins: $d0, $d1
40-
; CHECK: %val:_(s64) = COPY $d0
41-
; CHECK: %sign:_(s64) = COPY $d1
42-
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
43-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
44-
; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
45-
; CHECK: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
46-
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
47-
; CHECK: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
48-
; CHECK: [[BIT:%[0-9]+]]:_(<2 x s64>) = G_BIT [[IVEC]], [[IVEC1]], [[FNEG]]
49-
; CHECK: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BIT]](<2 x s64>)
50-
; CHECK: $d0 = COPY %fcopysign(s64)
51-
; CHECK: RET_ReallyLR implicit $d0
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: %val:_(s64) = COPY $d0
43+
; CHECK-NEXT: %sign:_(s64) = COPY $d1
44+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
45+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
46+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
47+
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
48+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
49+
; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
50+
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<2 x s64>) = G_BSP [[FNEG]], [[IVEC1]], [[IVEC]]
51+
; CHECK-NEXT: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BSP]](<2 x s64>)
52+
; CHECK-NEXT: $d0 = COPY %fcopysign(s64)
53+
; CHECK-NEXT: RET_ReallyLR implicit $d0
5254
%val:_(s64) = COPY $d0
5355
%sign:_(s64) = COPY $d1
5456
%fcopysign:_(s64) = G_FCOPYSIGN %val, %sign(s64)

llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,17 @@ body: |
1313
1414
; CHECK-LABEL: name: BITv8i8_v2s32
1515
; CHECK: liveins: $d0, $d1, $d2
16-
; CHECK: %lhs:fpr64 = COPY $d0
17-
; CHECK: %mhs:fpr64 = COPY $d1
18-
; CHECK: %rhs:fpr64 = COPY $d2
19-
; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs
20-
; CHECK: $d0 = COPY %bit
21-
; CHECK: RET_ReallyLR implicit $d0
16+
; CHECK-NEXT: {{ $}}
17+
; CHECK-NEXT: %lhs:fpr64 = COPY $d0
18+
; CHECK-NEXT: %mhs:fpr64 = COPY $d1
19+
; CHECK-NEXT: %rhs:fpr64 = COPY $d2
20+
; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs
21+
; CHECK-NEXT: $d0 = COPY %bit
22+
; CHECK-NEXT: RET_ReallyLR implicit $d0
2223
%lhs:fpr(<2 x s32>) = COPY $d0
2324
%mhs:fpr(<2 x s32>) = COPY $d1
2425
%rhs:fpr(<2 x s32>) = COPY $d2
25-
%bit:fpr(<2 x s32>) = G_BIT %lhs, %mhs, %rhs
26+
%bit:fpr(<2 x s32>) = G_BSP %lhs, %mhs, %rhs
2627
$d0 = COPY %bit(<2 x s32>)
2728
RET_ReallyLR implicit $d0
2829
@@ -37,16 +38,17 @@ body: |
3738
liveins: $d0, $d1, $d2
3839
; CHECK-LABEL: name: BITv8i8_v4s16
3940
; CHECK: liveins: $d0, $d1, $d2
40-
; CHECK: %lhs:fpr64 = COPY $d0
41-
; CHECK: %mhs:fpr64 = COPY $d1
42-
; CHECK: %rhs:fpr64 = COPY $d2
43-
; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs
44-
; CHECK: $d0 = COPY %bit
45-
; CHECK: RET_ReallyLR implicit $d0
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: %lhs:fpr64 = COPY $d0
43+
; CHECK-NEXT: %mhs:fpr64 = COPY $d1
44+
; CHECK-NEXT: %rhs:fpr64 = COPY $d2
45+
; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs
46+
; CHECK-NEXT: $d0 = COPY %bit
47+
; CHECK-NEXT: RET_ReallyLR implicit $d0
4648
%lhs:fpr(<4 x s16>) = COPY $d0
4749
%mhs:fpr(<4 x s16>) = COPY $d1
4850
%rhs:fpr(<4 x s16>) = COPY $d2
49-
%bit:fpr(<4 x s16>) = G_BIT %lhs, %mhs, %rhs
51+
%bit:fpr(<4 x s16>) = G_BSP %lhs, %mhs, %rhs
5052
$d0 = COPY %bit(<4 x s16>)
5153
RET_ReallyLR implicit $d0
5254
@@ -62,16 +64,17 @@ body: |
6264
6365
; CHECK-LABEL: name: BITv16i8_v2s64
6466
; CHECK: liveins: $q0, $q1, $q2
65-
; CHECK: %lhs:fpr128 = COPY $q0
66-
; CHECK: %mhs:fpr128 = COPY $q1
67-
; CHECK: %rhs:fpr128 = COPY $q2
68-
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
69-
; CHECK: $q0 = COPY %bit
70-
; CHECK: RET_ReallyLR implicit $q0
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
69+
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
70+
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
71+
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
72+
; CHECK-NEXT: $q0 = COPY %bit
73+
; CHECK-NEXT: RET_ReallyLR implicit $q0
7174
%lhs:fpr(<2 x s64>) = COPY $q0
7275
%mhs:fpr(<2 x s64>) = COPY $q1
7376
%rhs:fpr(<2 x s64>) = COPY $q2
74-
%bit:fpr(<2 x s64>) = G_BIT %lhs, %mhs, %rhs
77+
%bit:fpr(<2 x s64>) = G_BSP %lhs, %mhs, %rhs
7578
$q0 = COPY %bit(<2 x s64>)
7679
RET_ReallyLR implicit $q0
7780
@@ -87,16 +90,17 @@ body: |
8790
8891
; CHECK-LABEL: name: BITv16i8_v4s32
8992
; CHECK: liveins: $q0, $q1, $q2
90-
; CHECK: %lhs:fpr128 = COPY $q0
91-
; CHECK: %mhs:fpr128 = COPY $q1
92-
; CHECK: %rhs:fpr128 = COPY $q2
93-
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
94-
; CHECK: $q0 = COPY %bit
95-
; CHECK: RET_ReallyLR implicit $q0
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
95+
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
96+
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
97+
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
98+
; CHECK-NEXT: $q0 = COPY %bit
99+
; CHECK-NEXT: RET_ReallyLR implicit $q0
96100
%lhs:fpr(<4 x s32>) = COPY $q0
97101
%mhs:fpr(<4 x s32>) = COPY $q1
98102
%rhs:fpr(<4 x s32>) = COPY $q2
99-
%bit:fpr(<4 x s32>) = G_BIT %lhs, %mhs, %rhs
103+
%bit:fpr(<4 x s32>) = G_BSP %lhs, %mhs, %rhs
100104
$q0 = COPY %bit(<4 x s32>)
101105
RET_ReallyLR implicit $q0
102106
@@ -112,16 +116,17 @@ body: |
112116
113117
; CHECK-LABEL: name: BITv16i8_v8s16
114118
; CHECK: liveins: $q0, $q1, $q2
115-
; CHECK: %lhs:fpr128 = COPY $q0
116-
; CHECK: %mhs:fpr128 = COPY $q1
117-
; CHECK: %rhs:fpr128 = COPY $q2
118-
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
119-
; CHECK: $q0 = COPY %bit
120-
; CHECK: RET_ReallyLR implicit $q0
119+
; CHECK-NEXT: {{ $}}
120+
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
121+
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
122+
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
123+
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
124+
; CHECK-NEXT: $q0 = COPY %bit
125+
; CHECK-NEXT: RET_ReallyLR implicit $q0
121126
%lhs:fpr(<8 x s16>) = COPY $q0
122127
%mhs:fpr(<8 x s16>) = COPY $q1
123128
%rhs:fpr(<8 x s16>) = COPY $q2
124-
%bit:fpr(<8 x s16>) = G_BIT %lhs, %mhs, %rhs
129+
%bit:fpr(<8 x s16>) = G_BSP %lhs, %mhs, %rhs
125130
$q0 = COPY %bit(<8 x s16>)
126131
RET_ReallyLR implicit $q0
127132
@@ -137,15 +142,16 @@ body: |
137142
138143
; CHECK-LABEL: name: BITv16i8_v16s8
139144
; CHECK: liveins: $q0, $q1, $q2
140-
; CHECK: %lhs:fpr128 = COPY $q0
141-
; CHECK: %mhs:fpr128 = COPY $q1
142-
; CHECK: %rhs:fpr128 = COPY $q2
143-
; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
144-
; CHECK: $q0 = COPY %bit
145-
; CHECK: RET_ReallyLR implicit $q0
145+
; CHECK-NEXT: {{ $}}
146+
; CHECK-NEXT: %lhs:fpr128 = COPY $q0
147+
; CHECK-NEXT: %mhs:fpr128 = COPY $q1
148+
; CHECK-NEXT: %rhs:fpr128 = COPY $q2
149+
; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
150+
; CHECK-NEXT: $q0 = COPY %bit
151+
; CHECK-NEXT: RET_ReallyLR implicit $q0
146152
%lhs:fpr(<16 x s8>) = COPY $q0
147153
%mhs:fpr(<16 x s8>) = COPY $q1
148154
%rhs:fpr(<16 x s8>) = COPY $q2
149-
%bit:fpr(<16 x s8>) = G_BIT %lhs, %mhs, %rhs
155+
%bit:fpr(<16 x s8>) = G_BSP %lhs, %mhs, %rhs
150156
$q0 = COPY %bit(<16 x s8>)
151157
RET_ReallyLR implicit $q0

0 commit comments

Comments
 (0)