Skip to content

[AMDGPU] Add new 64-bit SALU instructions #74449

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,20 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32",
} // End isCommutable = 1
} // End Defs = [SCC]

let SubtargetPredicate = isGFX12Plus in {
def S_ADD_U64 : SOP2_64<"s_add_u64">{
let isCommutable = 1;
}

def S_SUB_U64 : SOP2_64<"s_sub_u64">;

def S_MUL_U64 : SOP2_64 <"s_mul_u64",
[(set i64:$sdst, (UniformBinFrag<mul> i64:$src0, i64:$src1))]> {
let isCommutable = 1;
}

} // End SubtargetPredicate = isGFX12Plus

def SelectPat : PatFrag <
(ops node:$src1, node:$src2),
(select SCC, $src1, $src2),
Expand Down Expand Up @@ -2072,6 +2086,9 @@ defm S_MUL_HI_I32 : SOP2_Real_gfx11_gfx12<0x02e>;
defm S_CSELECT_B32 : SOP2_Real_gfx11_gfx12<0x030>;
defm S_CSELECT_B64 : SOP2_Real_gfx11_gfx12<0x031>;
defm S_PACK_HL_B32_B16 : SOP2_Real_gfx11_gfx12<0x035>;
defm S_ADD_NC_U64 : SOP2_Real_Renamed_gfx12<0x053, S_ADD_U64, "s_add_nc_u64">;
defm S_SUB_NC_U64 : SOP2_Real_Renamed_gfx12<0x054, S_SUB_U64, "s_sub_nc_u64">;
Comment on lines +2089 to +2090
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear, GFX12 is adding "NC" to the SALU name to match the VALU equivalents. The old versions without NC (like S_ADD_U64) are still accepted for compatibility.

defm S_MUL_U64 : SOP2_Real_gfx12<0x055>;

//===----------------------------------------------------------------------===//
// SOP2 - GFX1150, GFX12
Expand Down
234 changes: 234 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_sop2.s
Original file line number Diff line number Diff line change
@@ -1,5 +1,239 @@
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck -check-prefix=GFX12 %s

s_add_nc_u64 s[0:1], s[2:3], s[4:5]
// GFX12: encoding: [0x02,0x04,0x80,0xa9]

s_add_nc_u64 s[100:101], s[102:103], s[104:105]
// GFX12: encoding: [0x66,0x68,0xe4,0xa9]

s_add_nc_u64 s[0:1], s[104:105], s[102:103]
// GFX12: encoding: [0x68,0x66,0x80,0xa9]

s_add_nc_u64 s[104:105], s[0:1], s[102:103]
// GFX12: encoding: [0x00,0x66,0xe8,0xa9]

s_add_nc_u64 s[104:105], s[102:103], s[2:3]
// GFX12: encoding: [0x66,0x02,0xe8,0xa9]

s_add_nc_u64 s[104:105], s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0xe8,0xa9]

s_add_nc_u64 s[0:1], s[102:103], s[2:3]
// GFX12: encoding: [0x66,0x02,0x80,0xa9]

s_add_nc_u64 s[0:1], s[2:3], s[102:103]
// GFX12: encoding: [0x02,0x66,0x80,0xa9]

s_add_nc_u64 exec, s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0xfe,0xa9]

s_add_nc_u64 vcc, s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0xea,0xa9]

s_add_nc_u64 s[0:1], exec, s[2:3]
// GFX12: encoding: [0x7e,0x02,0x80,0xa9]

s_add_nc_u64 s[0:1], vcc, s[2:3]
// GFX12: encoding: [0x6a,0x02,0x80,0xa9]

s_add_nc_u64 s[0:1], 0, s[2:3]
// GFX12: encoding: [0x80,0x02,0x80,0xa9]

s_add_nc_u64 s[0:1], -1, s[2:3]
// GFX12: encoding: [0xc1,0x02,0x80,0xa9]

s_add_nc_u64 s[0:1], 0.5, s[2:3]
// GFX12: encoding: [0xf0,0x02,0x80,0xa9]

s_add_nc_u64 s[0:1], -4.0, s[2:3]
// GFX12: encoding: [0xf7,0x02,0x80,0xa9]

s_add_nc_u64 s[0:1], 0x3f717273, s[2:3]
// GFX12: encoding: [0xff,0x02,0x80,0xa9,0x73,0x72,0x71,0x3f]

s_add_nc_u64 s[0:1], 0xaf123456, s[2:3]
// GFX12: encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf]

s_add_nc_u64 s[0:1], s[2:3], exec
// GFX12: encoding: [0x02,0x7e,0x80,0xa9]

s_add_nc_u64 s[0:1], s[2:3], vcc
// GFX12: encoding: [0x02,0x6a,0x80,0xa9]

s_add_nc_u64 s[0:1], s[2:3], 0
// GFX12: encoding: [0x02,0x80,0x80,0xa9]

s_add_nc_u64 s[0:1], s[2:3], -1
// GFX12: encoding: [0x02,0xc1,0x80,0xa9]

s_add_nc_u64 s[0:1], s[2:3], 0.5
// GFX12: encoding: [0x02,0xf0,0x80,0xa9]

s_add_nc_u64 s[0:1], s[2:3], -4.0
// GFX12: encoding: [0x02,0xf7,0x80,0xa9]

s_add_nc_u64 s[0:1], s[2:3], 0x3f717273
// GFX12: encoding: [0x02,0xff,0x80,0xa9,0x73,0x72,0x71,0x3f]

s_add_nc_u64 s[0:1], s[2:3], 0xaf123456
// GFX12: encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf]

s_sub_nc_u64 s[0:1], s[2:3], s[4:5]
// GFX12: encoding: [0x02,0x04,0x00,0xaa]

s_sub_nc_u64 s[100:101], s[102:103], s[104:105]
// GFX12: encoding: [0x66,0x68,0x64,0xaa]

s_sub_nc_u64 s[0:1], s[104:105], s[102:103]
// GFX12: encoding: [0x68,0x66,0x00,0xaa]

s_sub_nc_u64 s[104:105], s[0:1], s[102:103]
// GFX12: encoding: [0x00,0x66,0x68,0xaa]

s_sub_nc_u64 s[104:105], s[102:103], s[2:3]
// GFX12: encoding: [0x66,0x02,0x68,0xaa]

s_sub_nc_u64 s[104:105], s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0x68,0xaa]

s_sub_nc_u64 s[0:1], s[102:103], s[2:3]
// GFX12: encoding: [0x66,0x02,0x00,0xaa]

s_sub_nc_u64 s[0:1], s[2:3], s[102:103]
// GFX12: encoding: [0x02,0x66,0x00,0xaa]

s_sub_nc_u64 exec, s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0x7e,0xaa]

s_sub_nc_u64 vcc, s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0x6a,0xaa]

s_sub_nc_u64 s[0:1], exec, s[2:3]
// GFX12: encoding: [0x7e,0x02,0x00,0xaa]

s_sub_nc_u64 s[0:1], vcc, s[2:3]
// GFX12: encoding: [0x6a,0x02,0x00,0xaa]

s_sub_nc_u64 s[0:1], 0, s[2:3]
// GFX12: encoding: [0x80,0x02,0x00,0xaa]

s_sub_nc_u64 s[0:1], -1, s[2:3]
// GFX12: encoding: [0xc1,0x02,0x00,0xaa]

s_sub_nc_u64 s[0:1], 0.5, s[2:3]
// GFX12: encoding: [0xf0,0x02,0x00,0xaa]

s_sub_nc_u64 s[0:1], -4.0, s[2:3]
// GFX12: encoding: [0xf7,0x02,0x00,0xaa]

s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3]
// GFX12: encoding: [0xff,0x02,0x00,0xaa,0x73,0x72,0x71,0x3f]

s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3]
// GFX12: encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf]

s_sub_nc_u64 s[0:1], s[2:3], exec
// GFX12: encoding: [0x02,0x7e,0x00,0xaa]

s_sub_nc_u64 s[0:1], s[2:3], vcc
// GFX12: encoding: [0x02,0x6a,0x00,0xaa]

s_sub_nc_u64 s[0:1], s[2:3], 0
// GFX12: encoding: [0x02,0x80,0x00,0xaa]

s_sub_nc_u64 s[0:1], s[2:3], -1
// GFX12: encoding: [0x02,0xc1,0x00,0xaa]

s_sub_nc_u64 s[0:1], s[2:3], 0.5
// GFX12: encoding: [0x02,0xf0,0x00,0xaa]

s_sub_nc_u64 s[0:1], s[2:3], -4.0
// GFX12: encoding: [0x02,0xf7,0x00,0xaa]

s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273
// GFX12: encoding: [0x02,0xff,0x00,0xaa,0x73,0x72,0x71,0x3f]

s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456
// GFX12: encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf]

s_mul_u64 s[0:1], s[2:3], s[4:5]
// GFX12: encoding: [0x02,0x04,0x80,0xaa]

s_mul_u64 s[100:101], s[102:103], s[104:105]
// GFX12: encoding: [0x66,0x68,0xe4,0xaa]

s_mul_u64 s[0:1], s[104:105], s[102:103]
// GFX12: encoding: [0x68,0x66,0x80,0xaa]

s_mul_u64 s[104:105], s[0:1], s[102:103]
// GFX12: encoding: [0x00,0x66,0xe8,0xaa]

s_mul_u64 s[104:105], s[102:103], s[2:3]
// GFX12: encoding: [0x66,0x02,0xe8,0xaa]

s_mul_u64 s[104:105], s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0xe8,0xaa]

s_mul_u64 s[0:1], s[102:103], s[2:3]
// GFX12: encoding: [0x66,0x02,0x80,0xaa]

s_mul_u64 s[0:1], s[2:3], s[102:103]
// GFX12: encoding: [0x02,0x66,0x80,0xaa]

s_mul_u64 exec, s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0xfe,0xaa]

s_mul_u64 vcc, s[0:1], s[2:3]
// GFX12: encoding: [0x00,0x02,0xea,0xaa]

s_mul_u64 s[0:1], exec, s[2:3]
// GFX12: encoding: [0x7e,0x02,0x80,0xaa]

s_mul_u64 s[0:1], vcc, s[2:3]
// GFX12: encoding: [0x6a,0x02,0x80,0xaa]

s_mul_u64 s[0:1], 0, s[2:3]
// GFX12: encoding: [0x80,0x02,0x80,0xaa]

s_mul_u64 s[0:1], -1, s[2:3]
// GFX12: encoding: [0xc1,0x02,0x80,0xaa]

s_mul_u64 s[0:1], 0.5, s[2:3]
// GFX12: encoding: [0xf0,0x02,0x80,0xaa]

s_mul_u64 s[0:1], -4.0, s[2:3]
// GFX12: encoding: [0xf7,0x02,0x80,0xaa]

s_mul_u64 s[0:1], 0x3f717273, s[2:3]
// GFX12: encoding: [0xff,0x02,0x80,0xaa,0x73,0x72,0x71,0x3f]

s_mul_u64 s[0:1], 0xaf123456, s[2:3]
// GFX12: encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf]

s_mul_u64 s[0:1], s[2:3], exec
// GFX12: encoding: [0x02,0x7e,0x80,0xaa]

s_mul_u64 s[0:1], s[2:3], vcc
// GFX12: encoding: [0x02,0x6a,0x80,0xaa]

s_mul_u64 s[0:1], s[2:3], 0
// GFX12: encoding: [0x02,0x80,0x80,0xaa]

s_mul_u64 s[0:1], s[2:3], -1
// GFX12: encoding: [0x02,0xc1,0x80,0xaa]

s_mul_u64 s[0:1], s[2:3], 0.5
// GFX12: encoding: [0x02,0xf0,0x80,0xaa]

s_mul_u64 s[0:1], s[2:3], -4.0
// GFX12: encoding: [0x02,0xf7,0x80,0xaa]

s_mul_u64 s[0:1], s[2:3], 0x3f717273
// GFX12: encoding: [0x02,0xff,0x80,0xaa,0x73,0x72,0x71,0x3f]

s_mul_u64 s[0:1], s[2:3], 0xaf123456
// GFX12: encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf]

s_add_f32 s5, s1, s2
// GFX12: encoding: [0x01,0x02,0x05,0xa0]

Expand Down
6 changes: 6 additions & 0 deletions llvm/test/MC/AMDGPU/gfx12_asm_sop2_alias.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ s_add_i32 s0, s1, s2
s_add_u32 s0, s1, s2
// GFX12: encoding: [0x01,0x02,0x00,0x80]

s_add_u64 s[0:1], s[2:3], s[4:5]
// GFX12: encoding: [0x02,0x04,0x80,0xa9]

s_addc_u32 s0, s1, s2
// GFX12: encoding: [0x01,0x02,0x00,0x82]

Expand All @@ -15,6 +18,9 @@ s_sub_i32 s0, s1, s2
s_sub_u32 s0, s1, s2
// GFX12: encoding: [0x01,0x02,0x80,0x80]

s_sub_u64 s[0:1], s[2:3], s[4:5]
// GFX12: encoding: [0x02,0x04,0x00,0xaa]

s_subb_u32 s0, s1, s2
// GFX12: encoding: [0x01,0x02,0x80,0x82]

Expand Down
Loading