Skip to content

[AMDGPU] New alias v_interp_p2_new_f32 #118968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 6, 2024
Merged

Conversation

jayfoad
Copy link
Contributor

@jayfoad jayfoad commented Dec 6, 2024

This is for compatibility with SP3. Also add basic testing for the new
GFX11 VINTERP encoding.

This is for compatibility with SP3. Also add basic testing for the new
GFX11 VINTERP encoding.
@llvmbot llvmbot added backend:AMDGPU mc Machine (object) code labels Dec 6, 2024
@llvmbot
Copy link
Member

llvmbot commented Dec 6, 2024

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Jay Foad (jayfoad)

Changes

This is for compatibility with SP3. Also add basic testing for the new
GFX11 VINTERP encoding.


Full diff: https://github.com/llvm/llvm-project/pull/118968.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/VINTERPInstructions.td (+3)
  • (added) llvm/test/MC/AMDGPU/gfx11_asm_vinterp.s (+74)
  • (added) llvm/test/MC/AMDGPU/gfx11_asm_vinterp_alias.s (+5)
  • (added) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vinterp.txt (+74)
diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
index fa06d96085820e..1bfb835f04dde4 100644
--- a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
@@ -238,3 +238,6 @@ defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x002,
 defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x003, "v_interp_p2_f16_f32">;
 defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x004, "v_interp_p10_rtz_f16_f32">;
 defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x005, "v_interp_p2_rtz_f16_f32">;
+
+let AssemblerPredicate = isGFX11Plus in
+def : AMDGPUMnemonicAlias<"v_interp_p2_new_f32", "v_interp_p2_f32">;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vinterp.s b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp.s
new file mode 100644
index 00000000000000..bb44217a836d31
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp.s
@@ -0,0 +1,74 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
+
+v_interp_p10_f32 v0, v1, v2, v3 wait_exp:5
+// GFX11: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_f32 v0, -v1, v2, v3
+// GFX11: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24]
+
+v_interp_p10_f32 v0, v1, -v2, v3
+// GFX11: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44]
+
+v_interp_p10_f32 v0, v1, v2, -v3
+// GFX11: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84]
+
+v_interp_p2_f32 v0, v1, v2, v3 wait_exp:5
+// GFX11: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x01,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p2_f32 v0, -v1, v2, v3
+// GFX11: v_interp_p2_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24]
+
+v_interp_p2_f32 v0, v1, -v2, v3
+// GFX11: v_interp_p2_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44]
+
+v_interp_p2_f32 v0, v1, v2, -v3
+// GFX11: v_interp_p2_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84]
+
+v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:5
+// GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x02,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_f16_f32 v0, -v1, v2, v3
+// GFX11: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24]
+
+v_interp_p10_f16_f32 v0, v1, -v2, v3
+// GFX11: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44]
+
+v_interp_p10_f16_f32 v0, v1, v2, -v3
+// GFX11: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84]
+
+v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:5
+// GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x03,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p2_f16_f32 v0, -v1, v2, v3
+// GFX11: v_interp_p2_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x24]
+
+v_interp_p2_f16_f32 v0, v1, -v2, v3
+// GFX11: v_interp_p2_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x44]
+
+v_interp_p2_f16_f32 v0, v1, v2, -v3
+// GFX11: v_interp_p2_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x84]
+
+v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:5
+// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x04,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3
+// GFX11: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x24]
+
+v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3
+// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x44]
+
+v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3
+// GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x84]
+
+v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:5
+// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x05,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3
+// GFX11: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x24]
+
+v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3
+// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x44]
+
+v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3
+// GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x84]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_alias.s b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_alias.s
new file mode 100644
index 00000000000000..32b5eb14a50922
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_alias.s
@@ -0,0 +1,5 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefix=GFX11 %s
+
+v_interp_p2_new_f32 v0, v1, v2, v3
+// GFX11: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vinterp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vinterp.txt
new file mode 100644
index 00000000000000..afdee5b2eeb06c
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vinterp.txt
@@ -0,0 +1,74 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=GFX11 %s
+
+0x00,0x05,0x00,0xcd,0x01,0x05,0x0e,0x04
+# GFX11: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24
+# GFX11: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24]
+
+0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44
+# GFX11: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44]
+
+0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84
+# GFX11: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84]
+
+0x00,0x05,0x01,0xcd,0x01,0x05,0x0e,0x04
+# GFX11: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x01,0xcd,0x01,0x05,0x0e,0x04]
+
+0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24
+# GFX11: v_interp_p2_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x24]
+
+0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44
+# GFX11: v_interp_p2_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x44]
+
+0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84
+# GFX11: v_interp_p2_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x84]
+
+0x00,0x05,0x02,0xcd,0x01,0x05,0x0e,0x04
+# GFX11: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x02,0xcd,0x01,0x05,0x0e,0x04]
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24
+# GFX11: v_interp_p10_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x24]
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44
+# GFX11: v_interp_p10_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x44]
+
+0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84
+# GFX11: v_interp_p10_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x84]
+
+0x00,0x05,0x03,0xcd,0x01,0x05,0x0e,0x04
+# GFX11: v_interp_p2_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x03,0xcd,0x01,0x05,0x0e,0x04]
+
+0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x24
+# GFX11: v_interp_p2_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x24]
+
+0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x44
+# GFX11: v_interp_p2_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x44]
+
+0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x84
+# GFX11: v_interp_p2_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x84]
+
+0x00,0x05,0x04,0xcd,0x01,0x05,0x0e,0x04
+# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x04,0xcd,0x01,0x05,0x0e,0x04]
+
+0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x24
+# GFX11: v_interp_p10_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x24]
+
+0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x44
+# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x44]
+
+0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x84
+# GFX11: v_interp_p10_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x84]
+
+0x00,0x05,0x05,0xcd,0x01,0x05,0x0e,0x04
+# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, v3 wait_exp:5 ; encoding: [0x00,0x05,0x05,0xcd,0x01,0x05,0x0e,0x04]
+
+0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x24
+# GFX11: v_interp_p2_rtz_f16_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x24]
+
+0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x44
+# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x44]
+
+0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x84
+# GFX11: v_interp_p2_rtz_f16_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x84]

@jayfoad jayfoad requested a review from kosarev December 6, 2024 13:37
@@ -238,3 +238,6 @@ defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x002,
defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x003, "v_interp_p2_f16_f32">;
defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x004, "v_interp_p10_rtz_f16_f32">;
defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x005, "v_interp_p2_rtz_f16_f32">;

let AssemblerPredicate = isGFX11Plus in
def : AMDGPUMnemonicAlias<"v_interp_p2_new_f32", "v_interp_p2_f32">;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This alias name is really awful...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Using the same name for an instruction with very different behavior was also a terrible idea, but that's what happened here in GFX11.

@jayfoad jayfoad merged commit 3f3bcac into llvm:main Dec 6, 2024
11 checks passed
@jayfoad jayfoad deleted the v-interp-p2-new-f32 branch December 6, 2024 15:35
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants