-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[WebAssembly] Implement f16x8.replace_lane instruction. #99388
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WebAssembly] Implement f16x8.replace_lane instruction. #99388
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-mc Author: Brendan Dahl (brendandahl) ChangesUse a builtin and intrinsic until half types are better supported for instruction selection. Full diff: https://github.com/llvm/llvm-project/pull/99388.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index 2a45f8a6582a2..df304a71e475e 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -201,6 +201,7 @@ TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision")
TARGET_BUILTIN(__builtin_wasm_splat_f16x8, "V8hf", "nc", "half-precision")
TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hi", "nc", "half-precision")
+TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hif", "nc", "half-precision")
// Reference Types builtins
// Some builtins are custom type-checked - see 't' as part of the third argument,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 67027f8aa93f3..402b7a7b20e61 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -21386,6 +21386,13 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
return Builder.CreateCall(Callee, {Vector, Index});
}
+ case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
+ Value *Vector = EmitScalarExpr(E->getArg(0));
+ Value *Index = EmitScalarExpr(E->getArg(1));
+ Value *Val = EmitScalarExpr(E->getArg(2));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
+ return Builder.CreateCall(Callee, {Vector, Index, Val});
+ }
case WebAssembly::BI__builtin_wasm_table_get: {
assert(E->getArg(0)->getType()->isArrayType());
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 75861b1b4bd6d..f494aeada0157 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -840,6 +840,12 @@ float extract_lane_f16x8(f16x8 a, int i) {
return __builtin_wasm_extract_lane_f16x8(a, i);
}
+f16x8 replace_lane_f16x8(f16x8 a, int i, float v) {
+ // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 %i, float %v)
+ // WEBASSEMBLY-NEXT: ret <8 x half> %0
+ return __builtin_wasm_replace_lane_f16x8(a, i, v);
+}
+
f16x8 min_f16x8(f16x8 a, f16x8 b) {
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
// WEBASSEMBLY-NEXT: ret <8 x half> %0
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 47aab196a6d4f..4d2df1c44ebce 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -363,6 +363,10 @@ def int_wasm_extract_lane_f16x8:
DefaultAttrsIntrinsic<[llvm_float_ty],
[llvm_v8f16_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_replace_lane_f16x8:
+ DefaultAttrsIntrinsic<[llvm_v8f16_ty],
+ [llvm_v8f16_ty, llvm_i32_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 2ee430c88169d..f11fe12c6ecb8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -702,6 +702,19 @@ defm "" : ReplaceLane<I64x2, 30>;
defm "" : ReplaceLane<F32x4, 32>;
defm "" : ReplaceLane<F64x2, 34>;
+// For now use an instrinsic for f16x8.replace_lane instead of ReplaceLane above
+// since LL generated with half type arguments is not well supported and creates
+// conversions from f16->f32.
+defm REPLACE_LANE_F16x8 :
+ HALF_PRECISION_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, F32:$x),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set (v8f16 V128:$dst), (int_wasm_replace_lane_f16x8
+ (v8f16 V128:$vec),
+ (i32 LaneIdx8:$idx),
+ (f32 F32:$x)))],
+ "f16x8.replace_lane\t$dst, $vec, $idx, $x",
+ "f16x8.replace_lane\t$idx", 0x122>;
+
// Lower undef lane indices to zero
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
(REPLACE_LANE_I8x16 $vec, 0, $x)>;
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index fa78f5f9591d6..dba4138ad59cc 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -36,6 +36,14 @@ define float @extract_lane_v8f16(<8 x half> %v) {
ret float %r
}
+; CHECK-LABEL: replace_lane_v8f16:
+; CHECK: f16x8.replace_lane $push0=, $0, 1, $1
+; CHECK-NEXT: return $pop0
+define <8 x half> @replace_lane_v8f16(<8 x half> %v, float %f) {
+ %r = call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %v, i32 1, float %f)
+ ret <8 x half> %r
+}
+
; CHECK-LABEL: add_v8f16:
; CHECK: f16x8.add $push0=, $0, $1
; CHECK-NEXT: return $pop0
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 8c3483bfaad7a..7ae4d47d888cf 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -851,6 +851,9 @@ main:
# CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01]
f16x8.extract_lane 1
+ # CHECK: f16x8.replace_lane 1 # encoding: [0xfd,0xa2,0x02,0x01]
+ f16x8.replace_lane 1
+
# CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02]
f16x8.add
|
@llvm/pr-subscribers-clang Author: Brendan Dahl (brendandahl) ChangesUse a builtin and intrinsic until half types are better supported for instruction selection. Full diff: https://github.com/llvm/llvm-project/pull/99388.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index 2a45f8a6582a2..df304a71e475e 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -201,6 +201,7 @@ TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision")
TARGET_BUILTIN(__builtin_wasm_splat_f16x8, "V8hf", "nc", "half-precision")
TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hi", "nc", "half-precision")
+TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hif", "nc", "half-precision")
// Reference Types builtins
// Some builtins are custom type-checked - see 't' as part of the third argument,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 67027f8aa93f3..402b7a7b20e61 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -21386,6 +21386,13 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
return Builder.CreateCall(Callee, {Vector, Index});
}
+ case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
+ Value *Vector = EmitScalarExpr(E->getArg(0));
+ Value *Index = EmitScalarExpr(E->getArg(1));
+ Value *Val = EmitScalarExpr(E->getArg(2));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
+ return Builder.CreateCall(Callee, {Vector, Index, Val});
+ }
case WebAssembly::BI__builtin_wasm_table_get: {
assert(E->getArg(0)->getType()->isArrayType());
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 75861b1b4bd6d..f494aeada0157 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -840,6 +840,12 @@ float extract_lane_f16x8(f16x8 a, int i) {
return __builtin_wasm_extract_lane_f16x8(a, i);
}
+f16x8 replace_lane_f16x8(f16x8 a, int i, float v) {
+ // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 %i, float %v)
+ // WEBASSEMBLY-NEXT: ret <8 x half> %0
+ return __builtin_wasm_replace_lane_f16x8(a, i, v);
+}
+
f16x8 min_f16x8(f16x8 a, f16x8 b) {
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
// WEBASSEMBLY-NEXT: ret <8 x half> %0
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 47aab196a6d4f..4d2df1c44ebce 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -363,6 +363,10 @@ def int_wasm_extract_lane_f16x8:
DefaultAttrsIntrinsic<[llvm_float_ty],
[llvm_v8f16_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_replace_lane_f16x8:
+ DefaultAttrsIntrinsic<[llvm_v8f16_ty],
+ [llvm_v8f16_ty, llvm_i32_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 2ee430c88169d..f11fe12c6ecb8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -702,6 +702,19 @@ defm "" : ReplaceLane<I64x2, 30>;
defm "" : ReplaceLane<F32x4, 32>;
defm "" : ReplaceLane<F64x2, 34>;
+// For now use an instrinsic for f16x8.replace_lane instead of ReplaceLane above
+// since LL generated with half type arguments is not well supported and creates
+// conversions from f16->f32.
+defm REPLACE_LANE_F16x8 :
+ HALF_PRECISION_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, F32:$x),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set (v8f16 V128:$dst), (int_wasm_replace_lane_f16x8
+ (v8f16 V128:$vec),
+ (i32 LaneIdx8:$idx),
+ (f32 F32:$x)))],
+ "f16x8.replace_lane\t$dst, $vec, $idx, $x",
+ "f16x8.replace_lane\t$idx", 0x122>;
+
// Lower undef lane indices to zero
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
(REPLACE_LANE_I8x16 $vec, 0, $x)>;
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index fa78f5f9591d6..dba4138ad59cc 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -36,6 +36,14 @@ define float @extract_lane_v8f16(<8 x half> %v) {
ret float %r
}
+; CHECK-LABEL: replace_lane_v8f16:
+; CHECK: f16x8.replace_lane $push0=, $0, 1, $1
+; CHECK-NEXT: return $pop0
+define <8 x half> @replace_lane_v8f16(<8 x half> %v, float %f) {
+ %r = call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %v, i32 1, float %f)
+ ret <8 x half> %r
+}
+
; CHECK-LABEL: add_v8f16:
; CHECK: f16x8.add $push0=, $0, $1
; CHECK-NEXT: return $pop0
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 8c3483bfaad7a..7ae4d47d888cf 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -851,6 +851,9 @@ main:
# CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01]
f16x8.extract_lane 1
+ # CHECK: f16x8.replace_lane 1 # encoding: [0xfd,0xa2,0x02,0x01]
+ f16x8.replace_lane 1
+
# CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02]
f16x8.add
|
@@ -702,6 +702,19 @@ defm "" : ReplaceLane<I64x2, 30>; | |||
defm "" : ReplaceLane<F32x4, 32>; | |||
defm "" : ReplaceLane<F64x2, 34>; | |||
|
|||
// For now use an instrinsic for f16x8.replace_lane instead of ReplaceLane above | |||
// since LL generated with half type arguments is not well supported and creates |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is LL? I may lack the context because I haven't reviewed your other recent PRs..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this just refers to LLVM IR (but I agree that using that term is more consistent with what I've seen elsewhere).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I'll update.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
otherwise LGTM
Use a builtin and intrinsic until half types are better supported for instruction selection.
a6d65f2
to
8320b1f
Compare
Summary: Use a builtin and intrinsic until half types are better supported for instruction selection. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250626
Use a builtin and intrinsic until half types are better supported for instruction selection.