Skip to content

Commit 6e7b45c

Browse files
jwanggit86Jun Wang
andauthored
[AMDGPU][MC] Support tfe operand in image_atomic instructions (#92469)
Current, if an image_atomic instruction has the 'tfe' operand, the llvm-mc assembler in general would reject it. The only exception is when dmask is 0x1 and the instruction is not image_atomic_cmpswap (e.g., image_atomic_add v[5:6], v252, s[8:15] dmask:0x1 tfe). This patch fixes this problem and allows tfe to be specified in image_atomic instructions. --------- Co-authored-by: Jun Wang <[email protected]>
1 parent 11d7203 commit 6e7b45c

File tree

5 files changed

+316
-1
lines changed

5 files changed

+316
-1
lines changed

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,10 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
11011101
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
11021102
let VDataDwords = !if(isCmpSwap, 4, 2) in
11031103
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
1104+
let VDataDwords = !if(isCmpSwap, 2, 2) in
1105+
defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
1106+
let VDataDwords = !if(isCmpSwap, 4, 4) in
1107+
defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
11041108
}
11051109
} // End IsAtomicRet = 1
11061110
}

llvm/test/MC/AMDGPU/gfx10_asm_mimg.s

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,3 +654,94 @@ image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dma
654654

655655
image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
656656
; GFX10: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x40]
657+
658+
; Test dmask + tfe for image_atomic instructions
659+
image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D
660+
; GFX10: image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x01,0x44,0xf0,0x0a,0x00,0x04,0x00]
661+
662+
image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
663+
; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x45,0xf0,0x0a,0x00,0x04,0x00]
664+
665+
image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
666+
; GFX10: image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x03,0x44,0xf0,0x0a,0x00,0x04,0x00]
667+
668+
image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
669+
; GFX10: image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x45,0xf0,0x0a,0x00,0x04,0x00]
670+
671+
image_atomic_swap v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
672+
; GFX10: image_atomic_swap v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x3d,0xf0,0x0a,0x00,0x04,0x00]
673+
674+
image_atomic_swap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
675+
; GFX10: image_atomic_swap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x3d,0xf0,0x0a,0x00,0x04,0x00]
676+
677+
image_atomic_sub v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
678+
; GFX10: image_atomic_sub v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x49,0xf0,0x0a,0x00,0x04,0x00]
679+
680+
image_atomic_sub v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
681+
; GFX10: image_atomic_sub v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x49,0xf0,0x0a,0x00,0x04,0x00]
682+
683+
image_atomic_smin v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
684+
; GFX10: image_atomic_smin v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x51,0xf0,0x0a,0x00,0x04,0x00]
685+
686+
image_atomic_smin v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
687+
; GFX10: image_atomic_smin v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x51,0xf0,0x0a,0x00,0x04,0x00]
688+
689+
image_atomic_umin v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
690+
; GFX10: image_atomic_umin v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x55,0xf0,0x0a,0x00,0x04,0x00]
691+
692+
image_atomic_umin v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
693+
; GFX10: image_atomic_umin v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x55,0xf0,0x0a,0x00,0x04,0x00]
694+
695+
image_atomic_smax v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
696+
; GFX10: image_atomic_smax v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x59,0xf0,0x0a,0x00,0x04,0x00]
697+
698+
image_atomic_smax v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
699+
; GFX10: image_atomic_smax v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x59,0xf0,0x0a,0x00,0x04,0x00]
700+
701+
image_atomic_umax v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
702+
; GFX10: image_atomic_umax v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x5d,0xf0,0x0a,0x00,0x04,0x00]
703+
704+
image_atomic_umax v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
705+
; GFX10: image_atomic_umax v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x5d,0xf0,0x0a,0x00,0x04,0x00]
706+
707+
image_atomic_and v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
708+
; GFX10: image_atomic_and v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x61,0xf0,0x0a,0x00,0x04,0x00]
709+
710+
image_atomic_and v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
711+
; GFX10: image_atomic_and v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x61,0xf0,0x0a,0x00,0x04,0x00]
712+
713+
image_atomic_or v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
714+
; GFX10: image_atomic_or v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x65,0xf0,0x0a,0x00,0x04,0x00]
715+
716+
image_atomic_or v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
717+
; GFX10: image_atomic_or v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x65,0xf0,0x0a,0x00,0x04,0x00]
718+
719+
image_atomic_xor v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
720+
; GFX10: image_atomic_xor v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x69,0xf0,0x0a,0x00,0x04,0x00]
721+
722+
image_atomic_xor v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
723+
; GFX10: image_atomic_xor v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x69,0xf0,0x0a,0x00,0x04,0x00]
724+
725+
image_atomic_inc v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
726+
; GFX10: image_atomic_inc v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x6d,0xf0,0x0a,0x00,0x04,0x00]
727+
728+
image_atomic_inc v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
729+
; GFX10: image_atomic_inc v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x6d,0xf0,0x0a,0x00,0x04,0x00]
730+
731+
image_atomic_dec v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
732+
; GFX10: image_atomic_dec v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x01,0x71,0xf0,0x0a,0x00,0x04,0x00]
733+
734+
image_atomic_dec v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
735+
; GFX10: image_atomic_dec v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x71,0xf0,0x0a,0x00,0x04,0x00]
736+
737+
image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
738+
; GFX10: image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x03,0x40,0xf0,0x0a,0x00,0x04,0x00]
739+
740+
image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
741+
; GFX10: image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x03,0x41,0xf0,0x0a,0x00,0x04,0x00]
742+
743+
image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D
744+
; GFX10: image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x40,0xf0,0x0a,0x00,0x04,0x00]
745+
746+
image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
747+
; GFX10: image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x0f,0x41,0xf0,0x0a,0x00,0x04,0x00]

llvm/test/MC/AMDGPU/gfx11_asm_mimg.s

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5603,3 +5603,94 @@ image_store_pck v1, v[2:3], s[96:103] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA unorm a1
56035603

56045604
image_store_pck v255, v[254:255], ttmp[8:15] dmask:0x4 dim:SQ_RSRC_IMG_2D_MSAA unorm glc slc dlc a16 lwe
56055605
// GFX11: [0x98,0x74,0x21,0xf0,0xfe,0xff,0x5d,0x00]
5606+
5607+
; Test dmask + tfe for image_atomic instructions
5608+
image_atomic_add v0, v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D
5609+
// GFX11: [0x04,0x01,0x30,0xf0,0x0a,0x00,0x04,0x00]
5610+
5611+
image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5612+
// GFX11: [0x04,0x01,0x30,0xf0,0x0a,0x00,0x24,0x00]
5613+
5614+
image_atomic_add v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
5615+
// GFX11: [0x04,0x03,0x30,0xf0,0x0a,0x00,0x04,0x00]
5616+
5617+
image_atomic_add v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5618+
// GFX11: [0x04,0x03,0x30,0xf0,0x0a,0x00,0x24,0x00]
5619+
5620+
image_atomic_swap v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5621+
// GFX11: [0x04,0x01,0x28,0xf0,0x0a,0x00,0x24,0x00]
5622+
5623+
image_atomic_swap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5624+
// GFX11: [0x04,0x03,0x28,0xf0,0x0a,0x00,0x24,0x00]
5625+
5626+
image_atomic_sub v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5627+
// GFX11: [0x04,0x01,0x34,0xf0,0x0a,0x00,0x24,0x00]
5628+
5629+
image_atomic_sub v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5630+
// GFX11: [0x04,0x03,0x34,0xf0,0x0a,0x00,0x24,0x00]
5631+
5632+
image_atomic_smin v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5633+
// GFX11: [0x04,0x01,0x38,0xf0,0x0a,0x00,0x24,0x00]
5634+
5635+
image_atomic_smin v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5636+
// GFX11: [0x04,0x03,0x38,0xf0,0x0a,0x00,0x24,0x00]
5637+
5638+
image_atomic_umin v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5639+
// GFX11: [0x04,0x01,0x3c,0xf0,0x0a,0x00,0x24,0x00]
5640+
5641+
image_atomic_umin v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5642+
// GFX11: [0x04,0x03,0x3c,0xf0,0x0a,0x00,0x24,0x00]
5643+
5644+
image_atomic_smax v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5645+
// GFX11: [0x04,0x01,0x40,0xf0,0x0a,0x00,0x24,0x00]
5646+
5647+
image_atomic_smax v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5648+
// GFX11: [0x04,0x03,0x40,0xf0,0x0a,0x00,0x24,0x00]
5649+
5650+
image_atomic_umax v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5651+
// GFX11: [0x04,0x01,0x44,0xf0,0x0a,0x00,0x24,0x00]
5652+
5653+
image_atomic_umax v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5654+
// GFX11: [0x04,0x03,0x44,0xf0,0x0a,0x00,0x24,0x00]
5655+
5656+
image_atomic_and v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5657+
// GFX11: [0x04,0x01,0x48,0xf0,0x0a,0x00,0x24,0x00]
5658+
5659+
image_atomic_and v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5660+
// GFX11: [0x04,0x03,0x48,0xf0,0x0a,0x00,0x24,0x00]
5661+
5662+
image_atomic_or v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5663+
// GFX11: [0x04,0x01,0x4c,0xf0,0x0a,0x00,0x24,0x00]
5664+
5665+
image_atomic_or v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5666+
// GFX11: [0x04,0x03,0x4c,0xf0,0x0a,0x00,0x24,0x00]
5667+
5668+
image_atomic_xor v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5669+
// GFX11: [0x04,0x01,0x50,0xf0,0x0a,0x00,0x24,0x00]
5670+
5671+
image_atomic_xor v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5672+
// GFX11: [0x04,0x03,0x50,0xf0,0x0a,0x00,0x24,0x00]
5673+
5674+
image_atomic_inc v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5675+
// GFX11: [0x04,0x01,0x54,0xf0,0x0a,0x00,0x24,0x00]
5676+
5677+
image_atomic_inc v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5678+
// GFX11: [0x04,0x03,0x54,0xf0,0x0a,0x00,0x24,0x00]
5679+
5680+
image_atomic_dec v[0:1], v[10:11], s[16:23] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe
5681+
// GFX11: [0x04,0x01,0x58,0xf0,0x0a,0x00,0x24,0x00]
5682+
5683+
image_atomic_dec v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5684+
// GFX11: [0x04,0x03,0x58,0xf0,0x0a,0x00,0x24,0x00]
5685+
5686+
image_atomic_cmpswap v[0:1], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D
5687+
// GFX11: [0x04,0x03,0x2c,0xf0,0x0a,0x00,0x04,0x00]
5688+
5689+
image_atomic_cmpswap v[0:2], v[10:11], s[16:23] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
5690+
// GFX11: [0x04,0x03,0x2c,0xf0,0x0a,0x00,0x24,0x00]
5691+
5692+
image_atomic_cmpswap v[0:3], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D
5693+
// GFX11: [0x04,0x0f,0x2c,0xf0,0x0a,0x00,0x04,0x00]
5694+
5695+
image_atomic_cmpswap v[0:4], v[10:11], s[16:23] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
5696+
// GFX11: [0x04,0x0f,0x2c,0xf0,0x0a,0x00,0x24,0x00]

0 commit comments

Comments
 (0)