Skip to content

Commit 064cea9

Browse files
committed
AMDGPU/GlobalISel: Try to use s_and_b64 in ptrmask selection
Avoids a test diff with SDAG.
1 parent 2d1f9aa commit 064cea9

File tree

2 files changed

+32
-50
lines changed

2 files changed

+32
-50
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,6 +2552,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
25522552
Register MaskReg = I.getOperand(2).getReg();
25532553
LLT Ty = MRI->getType(DstReg);
25542554
LLT MaskTy = MRI->getType(MaskReg);
2555+
MachineBasicBlock *BB = I.getParent();
2556+
const DebugLoc &DL = I.getDebugLoc();
25552557

25562558
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
25572559
const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -2560,6 +2562,24 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
25602562
if (DstRB != SrcRB) // Should only happen for hand written MIR.
25612563
return false;
25622564

2565+
// Try to avoid emitting a bit operation when we only need to touch half of
2566+
// the 64-bit pointer.
2567+
APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
2568+
const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
2569+
const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
2570+
2571+
const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2572+
const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2573+
2574+
if (!IsVGPR && Ty.getSizeInBits() == 64 &&
2575+
!CanCopyLow32 && !CanCopyHi32) {
2576+
auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2577+
.addReg(SrcReg)
2578+
.addReg(MaskReg);
2579+
I.eraseFromParent();
2580+
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2581+
}
2582+
25632583
unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
25642584
const TargetRegisterClass &RegRC
25652585
= IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
@@ -2576,8 +2596,6 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
25762596
!RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
25772597
return false;
25782598

2579-
MachineBasicBlock *BB = I.getParent();
2580-
const DebugLoc &DL = I.getDebugLoc();
25812599
if (Ty.getSizeInBits() == 32) {
25822600
assert(MaskTy.getSizeInBits() == 32 &&
25832601
"ptrmask should have been narrowed during legalize");
@@ -2600,13 +2618,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
26002618

26012619
Register MaskedLo, MaskedHi;
26022620

2603-
// Try to avoid emitting a bit operation when we only need to touch half of
2604-
// the 64-bit pointer.
2605-
APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
2606-
2607-
const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
2608-
const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
2609-
if ((MaskOnes & MaskLo32) == MaskLo32) {
2621+
if (CanCopyLow32) {
26102622
// If all the bits in the low half are 1, we only need a copy for it.
26112623
MaskedLo = LoReg;
26122624
} else {
@@ -2621,7 +2633,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
26212633
.addReg(MaskLo);
26222634
}
26232635

2624-
if ((MaskOnes & MaskHi32) == MaskHi32) {
2636+
if (CanCopyHi32) {
26252637
// If all the bits in the high half are 1, we only need a copy for it.
26262638
MaskedHi = HiReg;
26272639
} else {

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir

Lines changed: 10 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,8 @@ body: |
244244
; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr
245245
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
246246
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
247-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
248-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
249-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
250-
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
251-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
252-
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc
253-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
254-
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
247+
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc
248+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
255249
%0:sgpr(p0) = COPY $sgpr0_sgpr1
256250
%1:sgpr(s64) = COPY $sgpr2_sgpr3
257251
%2:sgpr(p0) = G_PTRMASK %0, %1
@@ -293,14 +287,8 @@ body: |
293287
; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000
294288
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
295289
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
296-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
297-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
298-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
299-
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
300-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
301-
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
302-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
303-
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
290+
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def $scc
291+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
304292
%0:sgpr(p0) = COPY $sgpr0_sgpr1
305293
%1:sgpr(s64) = G_CONSTANT i64 0
306294
%2:sgpr(p0) = G_PTRMASK %0, %1
@@ -322,14 +310,8 @@ body: |
322310
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
323311
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
324312
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
325-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
326-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
327-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
328-
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
329-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
330-
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
331-
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
332-
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
313+
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc
314+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
333315
%0:sgpr(p0) = COPY $sgpr0_sgpr1
334316
%1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096
335317
%2:sgpr(p0) = G_PTRMASK %0, %1
@@ -351,14 +333,8 @@ body: |
351333
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
352334
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
353335
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
354-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
355-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
356-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
357-
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
358-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
359-
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
360-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
361-
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
336+
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc
337+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
362338
%0:sgpr(p0) = COPY $sgpr0_sgpr1
363339
%const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808
364340
%1:sgpr(p0) = G_PTRMASK %0, %const
@@ -407,14 +383,8 @@ body: |
407383
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
408384
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
409385
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
410-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
411-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
412-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
413-
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
414-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
415-
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
416-
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
417-
; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
386+
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc
387+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
418388
%0:sgpr(p0) = COPY $sgpr0_sgpr1
419389
%const:sgpr(s64) = G_CONSTANT i64 4294967296
420390
%1:sgpr(p0) = G_PTRMASK %0, %const

0 commit comments

Comments
 (0)