-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[WebAssembly] Add intrinsics to wasm_simd128.h for all FP16 instructions #106465
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ons. Getting this to work required a few additional changes: - Add builtins for any instructions that can't be done with plain C currently. - Add support for the saturating version of fp_to_<s,i>_I16x8. Other vector sizes supported this already. - Support bitcast of f16x8 to v128. Needed to return a __f16x8 as v128_t.
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Brendan Dahl (brendandahl) ChangesGetting this to work required a few additional changes:
Patch is 23.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106465.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index 034d32c6291b3d..2e80eef2c8b9bc 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -124,6 +124,7 @@ TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_abs_f16x8, "V8hV8h", "nc", "fp16")
TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")
@@ -140,6 +141,10 @@ TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "fp16")
TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "fp16")
TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_ceil_f16x8, "V8hV8h", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_floor_f16x8, "V8hV8h", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_trunc_f16x8, "V8hV8h", "nc", "fp16")
+TARGET_BUILTIN(__builtin_wasm_nearest_f16x8, "V8hV8h", "nc", "fp16")
TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_f32x4, "V4fV4f", "nc", "simd128")
@@ -151,9 +156,13 @@ TARGET_BUILTIN(__builtin_wasm_nearest_f64x2, "V2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_sqrt_f16x8, "V8hV8h", "nc", "fp16")
TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i16x8_f16x8, "V8sV8h", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i16x8_f16x8, "V8sV8h", "nc", "simd128")
+
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2a733e4d834cfa..bb5367c29b1c3a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -21208,6 +21208,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
@@ -21219,6 +21220,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
Value *Src = EmitScalarExpr(E->getArg(0));
llvm::Type *ResT = ConvertType(E->getType());
@@ -21266,6 +21268,10 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
+ case WebAssembly::BI__builtin_wasm_ceil_f16x8:
+ case WebAssembly::BI__builtin_wasm_floor_f16x8:
+ case WebAssembly::BI__builtin_wasm_trunc_f16x8:
+ case WebAssembly::BI__builtin_wasm_nearest_f16x8:
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
case WebAssembly::BI__builtin_wasm_floor_f32x4:
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
@@ -21276,18 +21282,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
unsigned IntNo;
switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_ceil_f16x8:
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
case WebAssembly::BI__builtin_wasm_ceil_f64x2:
IntNo = Intrinsic::ceil;
break;
+ case WebAssembly::BI__builtin_wasm_floor_f16x8:
case WebAssembly::BI__builtin_wasm_floor_f32x4:
case WebAssembly::BI__builtin_wasm_floor_f64x2:
IntNo = Intrinsic::floor;
break;
+ case WebAssembly::BI__builtin_wasm_trunc_f16x8:
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
case WebAssembly::BI__builtin_wasm_trunc_f64x2:
IntNo = Intrinsic::trunc;
break;
+ case WebAssembly::BI__builtin_wasm_nearest_f16x8:
case WebAssembly::BI__builtin_wasm_nearest_f32x4:
case WebAssembly::BI__builtin_wasm_nearest_f64x2:
IntNo = Intrinsic::nearbyint;
@@ -21486,12 +21496,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
return Builder.CreateCall(Callee, {Vec});
}
+ case WebAssembly::BI__builtin_wasm_abs_f16x8:
case WebAssembly::BI__builtin_wasm_abs_f32x4:
case WebAssembly::BI__builtin_wasm_abs_f64x2: {
Value *Vec = EmitScalarExpr(E->getArg(0));
Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
return Builder.CreateCall(Callee, {Vec});
}
+ case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
Value *Vec = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 2327bec52522d2..8d19609bf2168e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -33,6 +33,7 @@ typedef unsigned long long __u64x2
__attribute__((__vector_size__(16), __aligned__(16)));
typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef __fp16 __f16x8 __attribute__((__vector_size__(16), __aligned__(16)));
typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned char __u8x8
@@ -1878,6 +1879,152 @@ wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) {
(__i8x16)__a, (__i8x16)__b, (__i32x4)__c);
}
+// FP16 intrinsics
+#define __FP16_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("fp16"), \
+ __min_vector_width__(128)))
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) {
+ return (v128_t)__builtin_wasm_splat_f16x8(__a);
+}
+
+static __inline__ float __FP16_FN_ATTRS wasm_f16x8_extract_lane(v128_t __a,
+ int __i)
+ __REQUIRE_CONSTANT(__i) {
+ return __builtin_wasm_extract_lane_f16x8(__a, __i);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_replace_lane(v128_t __a,
+ int __i,
+ float __b)
+ __REQUIRE_CONSTANT(__i) {
+ return (v128_t)__builtin_wasm_replace_lane_f16x8(__a, __i, __b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) {
+ return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_neg(v128_t __a) {
+ return (v128_t)(-(__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sqrt(v128_t __a) {
+ return (v128_t)__builtin_wasm_sqrt_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ceil(v128_t __a) {
+ return (v128_t)__builtin_wasm_ceil_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_floor(v128_t __a) {
+ return (v128_t)__builtin_wasm_floor_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_trunc(v128_t __a) {
+ return (v128_t)__builtin_wasm_trunc_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_nearest(v128_t __a) {
+ return (v128_t)__builtin_wasm_nearest_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_eq(v128_t __a, v128_t __b) {
+ return (v128_t)((__f16x8)__a == (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ne(v128_t __a, v128_t __b) {
+ return (v128_t)((__f16x8)__a != (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_lt(v128_t __a, v128_t __b) {
+ return (v128_t)((__f16x8)__a < (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_gt(v128_t __a, v128_t __b) {
+ return (v128_t)((__f16x8)__a > (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_le(v128_t __a, v128_t __b) {
+ return (v128_t)((__f16x8)__a <= (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ge(v128_t __a, v128_t __b) {
+ return (v128_t)((__f16x8)__a >= (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_add(v128_t __a,
+ v128_t __b) {
+ return (v128_t)((__f16x8)__a + (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sub(v128_t __a,
+ v128_t __b) {
+ return (v128_t)((__f16x8)__a - (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_mul(v128_t __a,
+ v128_t __b) {
+ return (v128_t)((__f16x8)__a * (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_div(v128_t __a,
+ v128_t __b) {
+ return (v128_t)((__f16x8)__a / (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_min(v128_t __a,
+ v128_t __b) {
+ return (v128_t)__builtin_wasm_min_f16x8((__f16x8)__a, (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_max(v128_t __a,
+ v128_t __b) {
+ return (v128_t)__builtin_wasm_max_f16x8((__f16x8)__a, (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmin(v128_t __a,
+ v128_t __b) {
+ return (v128_t)__builtin_wasm_pmin_f16x8((__f16x8)__a, (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmax(v128_t __a,
+ v128_t __b) {
+ return (v128_t)__builtin_wasm_pmax_f16x8((__f16x8)__a, (__f16x8)__b);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_i16x8_trunc_sat_f16x8(v128_t __a) {
+ return (v128_t)__builtin_wasm_trunc_saturate_s_i16x8_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_u16x8_trunc_sat_f16x8(v128_t __a) {
+ return (v128_t)__builtin_wasm_trunc_saturate_u_i16x8_f16x8((__f16x8)__a);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_i16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector((__i16x8)__a, __f16x8);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
+ v128_t __b,
+ v128_t __c) {
+ return (v128_t)__builtin_wasm_relaxed_madd_f16x8((__f16x8)__a, (__f16x8)__b,
+ (__f16x8)__c);
+}
+
+static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_nmadd(v128_t __a,
+ v128_t __b,
+ v128_t __c) {
+ return (v128_t)__builtin_wasm_relaxed_nmadd_f16x8((__f16x8)__a, (__f16x8)__b,
+ (__f16x8)__c);
+}
+
// Deprecated intrinsics
static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle")
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index fb15e0143d3653..b601d90cfcc927 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -2,7 +2,7 @@
// expected-no-diagnostics
// RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown \
-// RUN: -msimd128 -mrelaxed-simd -Wcast-qual -Werror | FileCheck %s
+// RUN: -msimd128 -mrelaxed-simd -mfp16 -Wcast-qual -Werror | FileCheck %s
#include <wasm_simd128.h>
@@ -1385,3 +1385,139 @@ v128_t test_i16x8_relaxed_dot_i8x16_i7x16(v128_t a, v128_t b) {
v128_t test_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t a, v128_t b, v128_t c) {
return wasm_i32x4_relaxed_dot_i8x16_i7x16_add(a, b, c);
}
+
+// CHECK-LABEL: test_f16x8_splat:
+// CHECK: f16x8.splat{{$}}
+v128_t test_f16x8_splat(float a) { return wasm_f16x8_splat(a); }
+
+// CHECK-LABEL: test_f16x8_extract_lane:
+// CHECK: f16x8.extract_lane 7{{$}}
+int16_t test_f16x8_extract_lane(v128_t a) {
+ return wasm_f16x8_extract_lane(a, 7);
+}
+
+// CHECK-LABEL: test_f16x8_replace_lane:
+// CHECK: f16x8.replace_lane 7{{$}}
+v128_t test_f16x8_replace_lane(v128_t a, float b) {
+ return wasm_f16x8_replace_lane(a, 7, b);
+}
+
+// CHECK-LABEL: test_f16x8_abs:
+// CHECK: f16x8.abs{{$}}
+v128_t test_f16x8_abs(v128_t a) { return wasm_f16x8_abs(a); }
+
+// CHECK-LABEL: test_f16x8_neg:
+// CHECK: f16x8.neg{{$}}
+v128_t test_f16x8_neg(v128_t a) { return wasm_f16x8_neg(a); }
+
+// CHECK-LABEL: test_f16x8_sqrt:
+// CHECK: f16x8.sqrt{{$}}
+v128_t test_f16x8_sqrt(v128_t a) { return wasm_f16x8_sqrt(a); }
+
+// CHECK-LABEL: test_f16x8_ceil:
+// CHECK: f16x8.ceil{{$}}
+v128_t test_f16x8_ceil(v128_t a) { return wasm_f16x8_ceil(a); }
+
+// CHECK-LABEL: test_f16x8_floor:
+// CHECK: f16x8.floor{{$}}
+v128_t test_f16x8_floor(v128_t a) { return wasm_f16x8_floor(a); }
+
+// CHECK-LABEL: test_f16x8_trunc:
+// CHECK: f16x8.trunc{{$}}
+v128_t test_f16x8_trunc(v128_t a) { return wasm_f16x8_trunc(a); }
+
+// CHECK-LABEL: test_f16x8_nearest:
+// CHECK: f16x8.nearest{{$}}
+v128_t test_f16x8_nearest(v128_t a) { return wasm_f16x8_nearest(a); }
+
+// CHECK-LABEL: test_f16x8_add:
+// CHECK: f16x8.add{{$}}
+v128_t test_f16x8_add(v128_t a, v128_t b) { return wasm_f16x8_add(a, b); }
+
+// CHECK-LABEL: test_f16x8_sub:
+// CHECK: f16x8.sub{{$}}
+v128_t test_f16x8_sub(v128_t a, v128_t b) { return wasm_f16x8_sub(a, b); }
+
+// CHECK-LABEL: test_f16x8_mul:
+// CHECK: f16x8.mul{{$}}
+v128_t test_f16x8_mul(v128_t a, v128_t b) { return wasm_f16x8_mul(a, b); }
+
+// CHECK-LABEL: test_f16x8_div:
+// CHECK: f16x8.div{{$}}
+v128_t test_f16x8_div(v128_t a, v128_t b) { return wasm_f16x8_div(a, b); }
+
+// CHECK-LABEL: test_f16x8_min:
+// CHECK: f16x8.min{{$}}
+v128_t test_f16x8_min(v128_t a, v128_t b) { return wasm_f16x8_min(a, b); }
+
+// CHECK-LABEL: test_f16x8_max:
+// CHECK: f16x8.max{{$}}
+v128_t test_f16x8_max(v128_t a, v128_t b) { return wasm_f16x8_max(a, b); }
+
+// CHECK-LABEL: test_f16x8_pmin:
+// CHECK: f16x8.pmin{{$}}
+v128_t test_f16x8_pmin(v128_t a, v128_t b) { return wasm_f16x8_pmin(a, b); }
+
+// CHECK-LABEL: test_f16x8_pmax:
+// CHECK: f16x8.pmax{{$}}
+v128_t test_f16x8_pmax(v128_t a, v128_t b) { return wasm_f16x8_pmax(a, b); }
+
+// CHECK-LABEL: test_f16x8_eq:
+// CHECK: f16x8.eq{{$}}
+v128_t test_f16x8_eq(v128_t a, v128_t b) { return wasm_f16x8_eq(a, b); }
+
+// CHECK-LABEL: test_f16x8_ne:
+// CHECK: f16x8.ne{{$}}
+v128_t test_f16x8_ne(v128_t a, v128_t b) { return wasm_f16x8_ne(a, b); }
+
+// CHECK-LABEL: test_f16x8_lt:
+// CHECK: f16x8.lt{{$}}
+v128_t test_f16x8_lt(v128_t a, v128_t b) { return wasm_f16x8_lt(a, b); }
+
+// CHECK-LABEL: test_f16x8_gt:
+// CHECK: f16x8.gt{{$}}
+v128_t test_f16x8_gt(v128_t a, v128_t b) { return wasm_f16x8_gt(a, b); }
+
+// CHECK-LABEL: test_f16x8_le:
+// CHECK: f16x8.le{{$}}
+v128_t test_f16x8_le(v128_t a, v128_t b) { return wasm_f16x8_le(a, b); }
+
+// CHECK-LABEL: test_f16x8_ge:
+// CHECK: f16x8.ge{{$}}
+v128_t test_f16x8_ge(v128_t a, v128_t b) { return wasm_f16x8_ge(a, b); }
+
+// CHECK-LABEL: test_i16x8_trunc_sat_f16x8:
+// CHECK: i16x8.trunc_sat_f16x8_s{{$}}
+v128_t test_i16x8_trunc_sat_f16x8(v128_t a) {
+ return wasm_i16x8_trunc_sat_f16x8(a);
+}
+
+// CHECK-LABEL: test_u16x8_trunc_sat_f16x8:
+// CHECK: i16x8.trunc_sat_f16x8_u{{$}}
+v128_t test_u16x8_trunc_sat_f16x8(v128_t a) {
+ return wasm_u16x8_trunc_sat_f16x8(a);
+}
+
+// CHECK-LABEL: test_f16x8_convert_i16x8:
+// CHECK: f16x8.convert_i16x8_s{{$}}
+v128_t test_f16x8_convert_i16x8(v128_t a) {
+ return wasm_f16x8_convert_i16x8(a);
+}
+
+// CHECK-LABEL: test_f16x8_convert_u16x8:
+// CHECK: f16x8.convert_i16x8_u{{$}}
+v128_t test_f16x8_convert_u16x8(v128_t a) {
+ return wasm_f16x8_convert_u16x8(a);
+}
+
+// CHECK-LABEL: test_f16x8_relaxed_madd:
+// CHECK: f16x8.relaxed_madd{{$}}
+v128_t test_f16x8_relaxed_madd(v128_t a, v128_t b, v128_t c) {
+ return wasm_f16x8_relaxed_madd(a, b, c);
+}
+
+// CHECK-LABEL: test_f16x8_relaxed_nmadd:
+// CHECK: f16x8.relaxed_nmadd{{$}}
+v128_t test_f16x8_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
+ return wasm_f16x8_relaxed_nmadd(a, b, c);
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4578ff7f715146..5cc084f3ab1387 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -275,8 +275,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(Op, T, Expand);
// But saturating fp_to_int converstions are
- for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
+ for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) {
setOperationAction(Op, MVT::v4i32, Custom);
+ if (Subtarget->hasFP16()) {
+ setOperationAction(Op, MVT::v8i16, Custom);
+ }
+ }
// Support vector extending
for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
@@ -2475,6 +2479,9 @@ SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
if (ResT == MVT::v4i32 && SatVT == MVT::i32)
return Op;
+ if (ResT == MVT::v8i16 && SatVT == MVT::i16)
+ return Op;
+
return SDValue();
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 887278e9c12ef3..da4b8d228f627d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -165,8 +165,9 @@ def F16x8 : Vec {
let prefix = "f16x8";
}
-// TODO: Include F16x8 here when half precision is better supported.
-defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
+// TODO: Remove StdVecs when the F16x8 works every where StdVecs is used.
+defvar StdVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
+defvar AllVecs = !listconcat(StdVecs, [F16x8]);
defvar IntVecs = [I8x16, I16x8, I32x4, I64x2];
//===----------------------------------------------------------------------===//
@@ -188,7 +189,7 @@ defm LOAD_V128_A64 :
}
// Def load patterns from WebAssemblyInstrMemory.td for vector types
-foreach vec = AllVecs in {
+foreach vec = StdVecs in {
defm : LoadPat<vec.vt, load, "LOAD_V128">;
}
@@ -217,7 +218,7 @@ defm "" : SIMDLoadSplat<16, 8>;
defm "" : SIMDLoadSplat<32, 9>;
defm "" : SIMDLoadSplat<64, 10>;
-foreach vec = AllVecs in {
+foreach vec = StdVecs in {
defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
defm : LoadPat<vec.vt,
PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>,
@@ -389,7 +390,7 @@ defm STORE_V128_A64 :
}
// Def store patterns from WebAssemblyInstrMemory.td for vector types
-foreach vec = AllVecs in {
+foreach vec = StdVecs in {
defm : StorePat<vec.vt, store, "STORE_V128">;
}
@@ -513,7 +514,7 @@ defm "" : ConstVec<F64x2,
"$i0, $i1">;
// Match splat(x) -> const.v128(x, ..., x)
-foreach vec = AllVecs in {
+for...
[truncated]
|
@@ -165,8 +165,9 @@ def F16x8 : Vec { | |||
let prefix = "f16x8"; | |||
} | |||
|
|||
// TODO: Include F16x8 here when half precision is better supported. | |||
defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; | |||
// TODO: Remove StdVecs when the F16x8 works every where StdVecs is used. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not obvious from this patch, but now AllVecs
is only used in one place for bitcast (which means it now works for f16x8 vectors too).
Alternatively, I can leave AllVecs
alone and just concat F16x8 down where bitcast is supported.
Would it make sense to put these declarations behind |
I could do that, if that's preferred. I followed what the relaxed instructions did and use the target attribute |
Oh, I missed that. In that case, that seems sufficient. Thanks! |
Looks good to me, but I'm not an expert here.. Maybe @tlively can take a look? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks great! Glad you found the cross-project-tests.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/2900 Here is the relevant piece of the build log for the reference
|
Getting this to work required a few additional changes: