Skip to content

Commit dfde973

Browse files
committed
Revert to the 1st commit, 2753fd1, which disallows
OPSEL in v_dot4_* and v_dot8_* instructions for all targets. This reverts commit 7ae70f0 and all later commits.
1 parent f1728c7 commit dfde973

File tree

12 files changed

+866
-1369
lines changed

12 files changed

+866
-1369
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4626,40 +4626,28 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
46264626

46274627
uint64_t TSFlags = MII.get(Opc).TSFlags;
46284628

4629-
if (TSFlags & SIInstrFlags::IsDOT) {
4630-
// For all DOT instructions on GFX940, or VOP3P DOT instructions on all
4631-
// targets, i.e. v_dot2_*(except on pre-GFX940), v_dot4_* and v_dot8_*,
4632-
// op_sel must be 0 if present, and op_sel_hi cannot be present.
4633-
if (!isGFX10Plus() && !isGFX940())
4634-
if (Opc == AMDGPU::V_DOT2_F32_F16_vi ||
4635-
Opc == AMDGPU::V_DOT2_I32_I16_vi || Opc == AMDGPU::V_DOT2_U32_U16_vi)
4636-
return true;
4637-
4638-
if (isGFX940() || (TSFlags & SIInstrFlags::VOP3P)) {
4639-
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4640-
if (OpSelIdx != -1) {
4641-
if (Inst.getOperand(OpSelIdx).getImm() != 0)
4642-
return false;
4643-
}
4644-
int OpSelHiIdx =
4645-
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4646-
if (OpSelHiIdx != -1) {
4647-
// -1 is the default value for op_sel_hi
4648-
if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4649-
return false;
4650-
}
4629+
if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4630+
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4631+
if (OpSelIdx != -1) {
4632+
if (Inst.getOperand(OpSelIdx).getImm() != 0)
4633+
return false;
46514634
}
4652-
4653-
// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4654-
if (isGFX11Plus() && (TSFlags & SIInstrFlags::VOP3) &&
4655-
!(TSFlags & SIInstrFlags::VOP3P)) {
4656-
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4657-
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4658-
if (OpSel & 3)
4635+
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4636+
if (OpSelHiIdx != -1) {
4637+
if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
46594638
return false;
46604639
}
46614640
}
46624641

4642+
// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4643+
if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4644+
(TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4645+
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4646+
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4647+
if (OpSel & 3)
4648+
return false;
4649+
}
4650+
46634651
return true;
46644652
}
46654653

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,16 +405,16 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
405405

406406
let OtherPredicates = [HasDot7Insts] in {
407407
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
408-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
408+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
409409
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
410-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
410+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
411411
} // End OtherPredicates = [HasDot7Insts]
412412

413413
let OtherPredicates = [HasDot1Insts] in {
414414
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
415-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
415+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>;
416416
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
417-
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
417+
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>;
418418
} // End OtherPredicates = [HasDot1Insts]
419419

420420
def DOT2_BF16_Profile
@@ -433,7 +433,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
433433

434434
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
435435
let IsDOT = 1 in
436-
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
436+
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>,
437437
null_frag, 1>;
438438
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
439439
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.

llvm/lib/Target/AMDGPU/VOPInstructions.td

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,12 +1454,13 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
14541454
bit IsMAI = MAI;
14551455
}
14561456

1457-
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1458-
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1459-
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1460-
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1461-
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
1462-
def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>;
1457+
def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
1458+
def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
1459+
def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
1460+
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
1461+
def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>;
1462+
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
1463+
def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>;
14631464

14641465
// Packed is misleading, but it enables the appropriate op_sel
14651466
// modifiers.

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ name: smfma4x4_write_vgpr_dot_write
746746
body: |
747747
bb.0:
748748
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
749-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
749+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
750750
...
751751
# GCN-LABEL: name: smfma4x4_read_srcc_vgpr_valu_write
752752
# GCN: V_MFMA
@@ -945,7 +945,7 @@ name: dot_write_vgpr_different_dot_read_srcc
945945
body: |
946946
bb.0:
947947
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
948-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
948+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
949949
...
950950
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
951951
# GCN: V_DOT
@@ -955,7 +955,7 @@ name: dot_write_vgpr_different_dot_write
955955
body: |
956956
bb.0:
957957
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
958-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
958+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
959959
...
960960
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
961961
# GCN: V_DOT

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,7 +1071,7 @@ name: xdl_smfma4x4_write_vgpr_dot_write
10711071
body: |
10721072
bb.0:
10731073
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
1074-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1074+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
10751075
...
10761076
# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
10771077
# GCN: V_MFMA
@@ -1265,7 +1265,7 @@ name: dot_write_vgpr_different_dot_read_srcc
12651265
body: |
12661266
bb.0:
12671267
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1268-
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1268+
$vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
12691269
...
12701270
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
12711271
# GCN: V_DOT
@@ -1275,7 +1275,7 @@ name: dot_write_vgpr_different_dot_write
12751275
body: |
12761276
bb.0:
12771277
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
1278-
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1278+
$vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
12791279
...
12801280
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
12811281
# GCN: V_DOT

llvm/test/MC/AMDGPU/dl-insts.s

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -536,14 +536,6 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
536536
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
537537
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
538538
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
539-
// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
540-
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
541-
// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
542-
v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
543-
// CHECK: encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
544-
v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
545-
// CHECK: encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c]
546-
v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
547539

548540
//
549541
// Test clamp.

0 commit comments

Comments
 (0)