Skip to content

Commit 4369eee

Browse files
committed
Revert "[AMDGPU][True16][CodeGen] support v_mov_b16 and v_swap_b16 in true16 format (llvm#102198)"
This reverts commit ae059a1.
1 parent 7752fec commit 4369eee

File tree

9 files changed

+73
-192
lines changed

9 files changed

+73
-192
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,15 +1460,7 @@ bool SIFoldOperands::tryFoldFoldableCopy(
14601460
return false;
14611461
}
14621462

1463-
MachineOperand *OpToFoldPtr;
1464-
if (MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
1465-
// Folding when any src_modifiers are non-zero is unsupported
1466-
if (TII->hasAnyModifiersSet(MI))
1467-
return false;
1468-
OpToFoldPtr = &MI.getOperand(2);
1469-
} else
1470-
OpToFoldPtr = &MI.getOperand(1);
1471-
MachineOperand &OpToFold = *OpToFoldPtr;
1463+
MachineOperand &OpToFold = MI.getOperand(1);
14721464
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
14731465

14741466
// FIXME: We could also be folding things like TargetIndexes.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3369,8 +3369,6 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
33693369

33703370
bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
33713371
switch (MI.getOpcode()) {
3372-
case AMDGPU::V_MOV_B16_t16_e32:
3373-
case AMDGPU::V_MOV_B16_t16_e64:
33743372
case AMDGPU::V_MOV_B32_e32:
33753373
case AMDGPU::V_MOV_B32_e64:
33763374
case AMDGPU::V_MOV_B64_PSEUDO:
@@ -5641,9 +5639,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
56415639
unsigned RCID = get(MI.getOpcode()).operands()[OpIdx].RegClass;
56425640
const TargetRegisterClass *RC = RI.getRegClass(RCID);
56435641
unsigned Size = RI.getRegSizeInBits(*RC);
5644-
unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
5645-
: Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
5646-
: AMDGPU::V_MOV_B32_e32;
5642+
unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
56475643
if (MO.isReg())
56485644
Opcode = AMDGPU::COPY;
56495645
else if (RI.isSGPRClass(RC))

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 20 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,6 @@ void SIShrinkInstructions::dropInstructionKeepingImpDefs(
657657
// although requirements match the pass placement and it reduces code size too.
658658
MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
659659
assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
660-
MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
661660
MovT.getOpcode() == AMDGPU::COPY);
662661

663662
Register T = MovT.getOperand(0).getReg();
@@ -669,12 +668,7 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
669668
Register X = Xop.getReg();
670669
unsigned Xsub = Xop.getSubReg();
671670

672-
unsigned Size = TII->getOpSize(MovT, 0);
673-
674-
// We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers
675-
// are not allocatble.
676-
if (Size == 2 && X.isVirtual())
677-
return nullptr;
671+
unsigned Size = TII->getOpSize(MovT, 0) / 4;
678672

679673
if (!TRI->isVGPR(*MRI, X))
680674
return nullptr;
@@ -690,9 +684,9 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
690684
KilledT = MovY->killsRegister(T, TRI);
691685

692686
if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
693-
MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
694687
MovY->getOpcode() != AMDGPU::COPY) ||
695-
!MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T ||
688+
!MovY->getOperand(1).isReg() ||
689+
MovY->getOperand(1).getReg() != T ||
696690
MovY->getOperand(1).getSubReg() != Tsub)
697691
continue;
698692

@@ -720,15 +714,14 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
720714
}
721715
if (MovX ||
722716
(I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
723-
I->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
724717
I->getOpcode() != AMDGPU::COPY) ||
725718
I->getOperand(0).getReg() != X ||
726719
I->getOperand(0).getSubReg() != Xsub) {
727720
MovX = nullptr;
728721
break;
729722
}
730723

731-
if (Size > 4 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
724+
if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
732725
continue;
733726

734727
MovX = &*I;
@@ -737,40 +730,23 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
737730
if (!MovX)
738731
continue;
739732

740-
LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);
733+
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
741734

742-
MachineBasicBlock &MBB = *MovT.getParent();
743-
SmallVector<MachineInstr *, 4> Swaps;
744-
if (Size == 2) {
735+
for (unsigned I = 0; I < Size; ++I) {
736+
TargetInstrInfo::RegSubRegPair X1, Y1;
737+
X1 = getSubRegForIndex(X, Xsub, I);
738+
Y1 = getSubRegForIndex(Y, Ysub, I);
739+
MachineBasicBlock &MBB = *MovT.getParent();
745740
auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
746-
TII->get(AMDGPU::V_SWAP_B16))
747-
.addDef(X)
748-
.addDef(Y)
749-
.addReg(Y)
750-
.addReg(X)
751-
.getInstr();
752-
Swaps.push_back(MIB);
753-
} else {
754-
assert(Size > 0 && Size % 4 == 0);
755-
for (unsigned I = 0; I < Size / 4; ++I) {
756-
TargetInstrInfo::RegSubRegPair X1, Y1;
757-
X1 = getSubRegForIndex(X, Xsub, I);
758-
Y1 = getSubRegForIndex(Y, Ysub, I);
759-
auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
760-
TII->get(AMDGPU::V_SWAP_B32))
761-
.addDef(X1.Reg, 0, X1.SubReg)
762-
.addDef(Y1.Reg, 0, Y1.SubReg)
763-
.addReg(Y1.Reg, 0, Y1.SubReg)
764-
.addReg(X1.Reg, 0, X1.SubReg)
765-
.getInstr();
766-
Swaps.push_back(MIB);
767-
}
768-
}
769-
// Drop implicit EXEC.
770-
if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
771-
for (MachineInstr *Swap : Swaps) {
772-
Swap->removeOperand(Swap->getNumExplicitOperands());
773-
Swap->copyImplicitOps(*MBB.getParent(), *MovX);
741+
TII->get(AMDGPU::V_SWAP_B32))
742+
.addDef(X1.Reg, 0, X1.SubReg)
743+
.addDef(Y1.Reg, 0, Y1.SubReg)
744+
.addReg(Y1.Reg, 0, Y1.SubReg)
745+
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
746+
if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
747+
// Drop implicit EXEC.
748+
MIB->removeOperand(MIB->getNumExplicitOperands());
749+
MIB->copyImplicitOps(*MBB.getParent(), *MovX);
774750
}
775751
}
776752
MovX->eraseFromParent();
@@ -857,7 +833,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
857833
}
858834

859835
if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
860-
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
861836
MI.getOpcode() == AMDGPU::COPY)) {
862837
if (auto *NextMI = matchSwap(MI)) {
863838
Next = NextMI->getIterator();
@@ -1048,7 +1023,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
10481023
MachineFunctionProperties::Property::NoVRegs))
10491024
continue;
10501025

1051-
if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
1026+
if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
10521027
!shouldShrinkTrue16(MI))
10531028
continue;
10541029

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -751,7 +751,7 @@ let SubtargetPredicate = isGFX11Plus in {
751751
let IsInvalidSingleUseConsumer = 1;
752752
let IsInvalidSingleUseProducer = 1;
753753
}
754-
defm V_MOV_B16 : VOP1Inst_t16<"v_mov_b16", VOP_I16_I16>;
754+
defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
755755
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
756756
defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>;
757757
defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>;

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,14 +2131,26 @@ define void @test_store_fpimm(ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) {
21312131
; GFX10-NEXT: global_store_short v[2:3], v5, off
21322132
; GFX10-NEXT: s_setpc_b64 s[30:31]
21332133
;
2134-
; GFX11-LABEL: test_store_fpimm:
2135-
; GFX11: ; %bb.0:
2136-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2137-
; GFX11-NEXT: v_mov_b32_e32 v4, 0x3f80
2138-
; GFX11-NEXT: v_mov_b32_e32 v5, 0x4228
2139-
; GFX11-NEXT: global_store_b16 v[0:1], v4, off
2140-
; GFX11-NEXT: global_store_b16 v[2:3], v5, off
2141-
; GFX11-NEXT: s_setpc_b64 s[30:31]
2134+
; GFX11TRUE16-LABEL: test_store_fpimm:
2135+
; GFX11TRUE16: ; %bb.0:
2136+
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2137+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, 0x3f80
2138+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.h, 0x4228
2139+
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2140+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
2141+
; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, v4.h
2142+
; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v5, off
2143+
; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v4, off
2144+
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
2145+
;
2146+
; GFX11FAKE16-LABEL: test_store_fpimm:
2147+
; GFX11FAKE16: ; %bb.0:
2148+
; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2149+
; GFX11FAKE16-NEXT: v_mov_b32_e32 v4, 0x3f80
2150+
; GFX11FAKE16-NEXT: v_mov_b32_e32 v5, 0x4228
2151+
; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v4, off
2152+
; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v5, off
2153+
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
21422154
store bfloat 1.0, ptr addrspace(1) %ptr0
21432155
store bfloat 42.0, ptr addrspace(1) %ptr1
21442156
ret void

llvm/test/CodeGen/AMDGPU/fadd.f16.ll

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,9 @@ define amdgpu_kernel void @fadd_f16_imm_a(
246246
; GFX11-SDAG-NEXT: s_mov_b32 s3, s7
247247
; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
248248
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
249-
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
249+
; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x3c00
250+
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
251+
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
250252
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
251253
; GFX11-SDAG-NEXT: s_nop 0
252254
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -262,7 +264,9 @@ define amdgpu_kernel void @fadd_f16_imm_a(
262264
; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
263265
; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
264266
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
265-
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
267+
; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, 0x3c00
268+
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
269+
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
266270
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
267271
; GFX11-GISEL-NEXT: s_nop 0
268272
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -386,7 +390,9 @@ define amdgpu_kernel void @fadd_f16_imm_b(
386390
; GFX11-SDAG-NEXT: s_mov_b32 s3, s7
387391
; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
388392
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
389-
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
393+
; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x4000
394+
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
395+
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
390396
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
391397
; GFX11-SDAG-NEXT: s_nop 0
392398
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -402,7 +408,9 @@ define amdgpu_kernel void @fadd_f16_imm_b(
402408
; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
403409
; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
404410
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
405-
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
411+
; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, 0x4000
412+
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
413+
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
406414
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
407415
; GFX11-GISEL-NEXT: s_nop 0
408416
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

llvm/test/CodeGen/AMDGPU/v_swap_b16.ll

Lines changed: 0 additions & 110 deletions
This file was deleted.

llvm/test/MC/AMDGPU/gfx11_asm_err.s

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,21 @@ s_load_b96 s[20:22], s[2:3], s0
169169

170170
s_buffer_load_b96 s[20:22], s[4:7], s0
171171
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
172+
173+
v_mov_b16 v0.l, s0.h
174+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
175+
176+
v_mov_b16 v0.l, ttmp0.h
177+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
178+
179+
v_mov_b16 v0.l, a0.h
180+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
181+
182+
v_mov_b16 v0.l, s0.h
183+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
184+
185+
v_mov_b16 v0.l, ttmp0.h
186+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
187+
188+
v_mov_b16 v0.l, a0.h
189+
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

llvm/test/MC/AMDGPU/gfx11_asm_t16_err.s

Lines changed: 0 additions & 10 deletions
This file was deleted.

0 commit comments

Comments
 (0)