Skip to content

Commit f1728c7

Browse files
committed
For pre-GFX940 targets, v_dot2_* allow opsel.
1 parent 82f0f7c commit f1728c7

File tree

4 files changed

+172
-438
lines changed

4 files changed

+172
-438
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4626,32 +4626,40 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
46264626

46274627
uint64_t TSFlags = MII.get(Opc).TSFlags;
46284628

4629-
// For DOT instructions on GFX940, or VOP3P DOT instructions on all targets,
4630-
// op_sel must be 0 if present, and op_sel_hi cannot be present.
4631-
if ((TSFlags & SIInstrFlags::IsDOT) &&
4632-
(isGFX940() || (TSFlags & SIInstrFlags::VOP3P))) {
4633-
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4634-
if (OpSelIdx != -1) {
4635-
if (Inst.getOperand(OpSelIdx).getImm() != 0)
4636-
return false;
4629+
if (TSFlags & SIInstrFlags::IsDOT) {
4630+
// For all DOT instructions on GFX940, or VOP3P DOT instructions on all
4631+
// targets, i.e. v_dot2_*(except on pre-GFX940), v_dot4_* and v_dot8_*,
4632+
// op_sel must be 0 if present, and op_sel_hi cannot be present.
4633+
if (!isGFX10Plus() && !isGFX940())
4634+
if (Opc == AMDGPU::V_DOT2_F32_F16_vi ||
4635+
Opc == AMDGPU::V_DOT2_I32_I16_vi || Opc == AMDGPU::V_DOT2_U32_U16_vi)
4636+
return true;
4637+
4638+
if (isGFX940() || (TSFlags & SIInstrFlags::VOP3P)) {
4639+
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4640+
if (OpSelIdx != -1) {
4641+
if (Inst.getOperand(OpSelIdx).getImm() != 0)
4642+
return false;
4643+
}
4644+
int OpSelHiIdx =
4645+
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4646+
if (OpSelHiIdx != -1) {
4647+
// -1 is the default value for op_sel_hi
4648+
if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4649+
return false;
4650+
}
46374651
}
4638-
int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4639-
if (OpSelHiIdx != -1) {
4640-
// -1 is the default value for op_sel_hi
4641-
if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4652+
4653+
// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4654+
if (isGFX11Plus() && (TSFlags & SIInstrFlags::VOP3) &&
4655+
!(TSFlags & SIInstrFlags::VOP3P)) {
4656+
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4657+
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4658+
if (OpSel & 3)
46424659
return false;
46434660
}
46444661
}
46454662

4646-
// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4647-
if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4648-
(TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4649-
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4650-
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4651-
if (OpSel & 3)
4652-
return false;
4653-
}
4654-
46554663
return true;
46564664
}
46574665

llvm/test/MC/AMDGPU/dl-insts.s

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,15 +389,153 @@ v_dot8_i32_i4 v0, v1, v2, v3
389389
v_dot8_u32_u4 v0, v1, v2, v3
390390

391391
//
392-
// Test op_sel/op_sel_hi: in VOP3P dot, op_sel must be 0, op_sel_hi cannot appear
392+
// Test op_sel/op_sel_hi.
393393
//
394394

395395
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
396396
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0]
397+
// CHECK: encoding: [0x00,0x50,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
398+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1]
399+
// CHECK: encoding: [0x00,0x48,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
400+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0]
401+
// CHECK: encoding: [0x00,0x58,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
402+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1]
403+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
404+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,0]
405+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
406+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,1]
407+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
408+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,0]
409+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
410+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,1]
411+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
412+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
413+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
414+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
415+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
416+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
417+
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
418+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
419+
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x04]
420+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
421+
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x14]
422+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
423+
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
424+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
425+
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
426+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
427+
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x04]
428+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
429+
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x14]
430+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
431+
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
432+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
433+
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
434+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
435+
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x04]
436+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
437+
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x14]
438+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
439+
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
440+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
441+
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
442+
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
397443
// CHECK: encoding: [0x00,0x40,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
398444
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0]
445+
// CHECK: encoding: [0x00,0x50,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
446+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1]
447+
// CHECK: encoding: [0x00,0x48,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
448+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0]
449+
// CHECK: encoding: [0x00,0x58,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
450+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1]
451+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
452+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,0]
453+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
454+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,1]
455+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
456+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,0]
457+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
458+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,1]
459+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
460+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
461+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
462+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
463+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
464+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
465+
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
466+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
467+
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x04]
468+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
469+
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x14]
470+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
471+
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
472+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
473+
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
474+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
475+
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x04]
476+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
477+
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x14]
478+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
479+
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
480+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
481+
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
482+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
483+
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x04]
484+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
485+
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x14]
486+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
487+
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
488+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
489+
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
490+
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
399491
// CHECK: encoding: [0x00,0x40,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
400492
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0]
493+
// CHECK: encoding: [0x00,0x50,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
494+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1]
495+
// CHECK: encoding: [0x00,0x48,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
496+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0]
497+
// CHECK: encoding: [0x00,0x58,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
498+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1]
499+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
500+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,0]
501+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
502+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,1]
503+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
504+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,0]
505+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
506+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,1]
507+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
508+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
509+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
510+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
511+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
512+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
513+
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
514+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
515+
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x04]
516+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
517+
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x14]
518+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
519+
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
520+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
521+
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
522+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
523+
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x04]
524+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
525+
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x14]
526+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
527+
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
528+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
529+
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
530+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
531+
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x04]
532+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
533+
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x14]
534+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
535+
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
536+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
537+
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
538+
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
401539
// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
402540
v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
403541
// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]

0 commit comments

Comments
 (0)