Skip to content

[WebAssembly] Implement trunc_sat and convert instructions for f16x8. #95180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2024

Conversation

brendandahl
Copy link
Contributor

@llvmbot llvmbot added backend:WebAssembly mc Machine (object) code labels Jun 11, 2024
@brendandahl brendandahl requested review from dschuff and aheejin June 11, 2024 23:15
@llvmbot
Copy link
Member

llvmbot commented Jun 11, 2024

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-webassembly

Author: Brendan Dahl (brendandahl)

Changes

These instructions can be generated using regular LL intrinsics.

Specified at:
https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md


Full diff: https://github.com/llvm/llvm-project/pull/95180.diff

3 Files Affected:

  • (modified) llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td (+11-2)
  • (modified) llvm/test/CodeGen/WebAssembly/half-precision.ll (+36)
  • (modified) llvm/test/MC/WebAssembly/simd-encodings.s (+12)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 3888175efd115..2ee430c88169d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1320,16 +1320,23 @@ def : Pat<(v8f16 (int_wasm_pmax (v8f16 V128:$lhs), (v8f16 V128:$rhs))),
 //===----------------------------------------------------------------------===//
 
 multiclass SIMDConvert<Vec vec, Vec arg, SDPatternOperator op, string name,
-                       bits<32> simdop> {
+                       bits<32> simdop, list<Predicate> reqs = []> {
   defm op#_#vec :
     SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
            [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
-           vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
+           vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop, reqs>;
+}
+
+multiclass HalfPrecisionConvert<Vec vec, Vec arg, SDPatternOperator op,
+                                string name, bits<32> simdop> {
+  defm "" : SIMDConvert<vec, arg, op, name, simdop, [HasHalfPrecision]>;
 }
 
 // Floating point to integer with saturation: trunc_sat
 defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>;
 defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>;
+defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_sint, "trunc_sat_f16x8_s", 0x148>;
+defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_uint, "trunc_sat_f16x8_u", 0x149>;
 
 // Support the saturating variety as well.
 def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i32)>;
@@ -1355,6 +1362,8 @@ defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
 defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
 defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>;
 defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>;
+defm "" : HalfPrecisionConvert<F16x8, I16x8, sint_to_fp, "convert_i16x8_s", 0x14a>;
+defm "" : HalfPrecisionConvert<F16x8, I16x8, uint_to_fp, "convert_i16x8_u", 0x14b>;
 
 // Extending operations
 // TODO: refactor this to be uniform for i64x2 if the numbering is not changed.
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 0f0a159091514..fa78f5f9591d6 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -246,3 +246,39 @@ define <8 x half> @nearest_v8f16_via_roundeven(<8 x half> %a) {
   %v = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a)
   ret <8 x half> %v
 }
+
+define <8 x half> @convert_s_v8f16(<8 x i16> %x) {
+; CHECK-LABEL: convert_s_v8f16:
+; CHECK:         .functype convert_s_v8f16 (v128) -> (v128)
+; CHECK-NEXT:    f16x8.convert_i16x8_s $push0=, $0
+; CHECK-NEXT:    return $pop[[R]]{{$}}
+  %a = sitofp <8 x i16> %x to <8 x half>
+  ret <8 x half> %a
+}
+
+define <8 x half> @convert_u_v8f16(<8 x i16> %x) {
+; CHECK-LABEL: convert_u_v8f16:
+; CHECK:         .functype convert_u_v8f16 (v128) -> (v128)
+; CHECK-NEXT:    f16x8.convert_i16x8_u $push0=, $0
+; CHECK-NEXT:    return $pop[[R]]{{$}}
+  %a = uitofp <8 x i16> %x to <8 x half>
+  ret <8 x half> %a
+}
+
+define <8 x i16> @trunc_sat_s_v8i16(<8 x half> %x) {
+; CHECK-LABEL: trunc_sat_s_v8i16:
+; CHECK:         .functype trunc_sat_s_v8i16 (v128) -> (v128)
+; CHECK-NEXT:    i16x8.trunc_sat_f16x8_s $push0=, $0
+; CHECK-NEXT:    return $pop[[R]]{{$}}
+  %a = fptosi <8 x half> %x to <8 x i16>
+  ret <8 x i16> %a
+}
+
+define <8 x i16> @trunc_sat_u_v8i16(<8 x half> %x) {
+; CHECK-LABEL: trunc_sat_u_v8i16:
+; CHECK:         .functype trunc_sat_u_v8i16 (v128) -> (v128)
+; CHECK-NEXT:    i16x8.trunc_sat_f16x8_u $push0=, $0
+; CHECK-NEXT:    return $pop[[R]]{{$}}
+  %a = fptoui <8 x half> %x to <8 x i16>
+  ret <8 x i16> %a
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 88c91be9263da..8c3483bfaad7a 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -920,4 +920,16 @@ main:
     # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xc7,0x02]
     f16x8.relaxed_nmadd
 
+    # CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc8,0x02]
+    i16x8.trunc_sat_f16x8_s
+
+    # CHECK: i16x8.trunc_sat_f16x8_u # encoding: [0xfd,0xc9,0x02]
+    i16x8.trunc_sat_f16x8_u
+
+    # CHECK: f16x8.convert_i16x8_s # encoding: [0xfd,0xca,0x02]
+    f16x8.convert_i16x8_s
+
+    # CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xcb,0x02]
+    f16x8.convert_i16x8_u
+
     end_function

@brendandahl brendandahl merged commit 928b780 into llvm:main Jun 25, 2024
10 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jun 25, 2024

LLVM Buildbot has detected a new failure on builder openmp-offload-libc-amdgpu-runtime running on omp-vega20-1 while building llvm at step 10 "Add check check-offload".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/663

Here is the relevant piece of the build log for the reference:

Step 10 (Add check check-offload) failure: 1200 seconds without output running [b'ninja', b'-j 32', b'check-offload'], attempting to kill
...
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/offloading_success.cpp (709 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/workshare_chunk.c (710 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/dynamic_module.c (711 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/target_memcpy.c (712 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/target_memcpy_emi.c (713 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy.c (714 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy_data.c (715 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy_disallow_both.c (716 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy_emi.c (717 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy_emi_map.c (718 of 759)
UNSUPPORTED: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy_map.c (719 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/target-tile.c (720 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/static_linking.c (721 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/target_constexpr_mapping.cpp (722 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/target-teams-atomic.c (723 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/target_depend_nowait.cpp (724 of 759)
PASS: libomptarget :: amdgcn-amd-amdhsa :: offloading/parallel_target_teams_reduction_max.cpp (725 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/shared_lib_fp_mapping.c (726 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/task_in_reduction_target.c (727 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: unified_shared_memory/associate_ptr.c (728 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/weak.c (729 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/thread_state_2.c (730 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: mapping/lambda_mapping.cpp (731 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: unified_shared_memory/api.c (732 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/multiple_reductions_simple.c (733 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy_no_device_init.c (734 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: ompt/veccopy_wrong_return.c (735 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/malloc.c (736 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: unified_shared_memory/close_enter_exit.c (737 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: unified_shared_memory/close_member.c (738 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/requires.c (739 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: unified_shared_memory/close_modifier.c (740 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: unified_shared_memory/close_manual.c (741 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: unified_shared_memory/shared_update.c (742 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/bug51781.c (743 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/target_nowait_target.cpp (744 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/thread_state_1.c (745 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/malloc_parallel.c (746 of 759)
PASS: libomptarget :: amdgcn-amd-amdhsa :: offloading/bug49021.cpp (747 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/bug53727.cpp (748 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/bug47654.cpp (749 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/test_libc.cpp (750 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/wtime.c (751 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/bug49779.cpp (752 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/bug50022.cpp (753 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu :: offloading/bug49021.cpp (754 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu :: offloading/std_complex_arithmetic.cpp (755 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/complex_reduction.cpp (756 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/bug49021.cpp (757 of 759)
PASS: libomptarget :: x86_64-pc-linux-gnu-LTO :: offloading/std_complex_arithmetic.cpp (758 of 759)

AlexisPerry pushed a commit to llvm-project-tlp/llvm-project that referenced this pull request Jul 9, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:WebAssembly mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants