Skip to content

Commit 0dbd72d

Browse files
authored
[WebAssembly] Implement f16x8.replace_lane instruction. (#99388)
Use a builtin and intrinsic until half types are better supported for instruction selection.
1 parent 3db78fa commit 0dbd72d

File tree

7 files changed

+42
-0
lines changed

7 files changed

+42
-0
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
201201
TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision")
202202
TARGET_BUILTIN(__builtin_wasm_splat_f16x8, "V8hf", "nc", "half-precision")
203203
TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hi", "nc", "half-precision")
204+
TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hif", "nc", "half-precision")
204205

205206
// Reference Types builtins
206207
// Some builtins are custom type-checked - see 't' as part of the third argument,

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21567,6 +21567,13 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2156721567
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
2156821568
return Builder.CreateCall(Callee, {Vector, Index});
2156921569
}
21570+
case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
21571+
Value *Vector = EmitScalarExpr(E->getArg(0));
21572+
Value *Index = EmitScalarExpr(E->getArg(1));
21573+
Value *Val = EmitScalarExpr(E->getArg(2));
21574+
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
21575+
return Builder.CreateCall(Callee, {Vector, Index, Val});
21576+
}
2157021577
case WebAssembly::BI__builtin_wasm_table_get: {
2157121578
assert(E->getArg(0)->getType()->isArrayType());
2157221579
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);

clang/test/CodeGen/builtins-wasm.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,12 @@ float extract_lane_f16x8(f16x8 a, int i) {
840840
return __builtin_wasm_extract_lane_f16x8(a, i);
841841
}
842842

843+
f16x8 replace_lane_f16x8(f16x8 a, int i, float v) {
844+
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 %i, float %v)
845+
// WEBASSEMBLY-NEXT: ret <8 x half> %0
846+
return __builtin_wasm_replace_lane_f16x8(a, i, v);
847+
}
848+
843849
f16x8 min_f16x8(f16x8 a, f16x8 b) {
844850
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
845851
// WEBASSEMBLY-NEXT: ret <8 x half> %0

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,10 @@ def int_wasm_extract_lane_f16x8:
363363
DefaultAttrsIntrinsic<[llvm_float_ty],
364364
[llvm_v8f16_ty, llvm_i32_ty],
365365
[IntrNoMem, IntrSpeculatable]>;
366+
def int_wasm_replace_lane_f16x8:
367+
DefaultAttrsIntrinsic<[llvm_v8f16_ty],
368+
[llvm_v8f16_ty, llvm_i32_ty, llvm_float_ty],
369+
[IntrNoMem, IntrSpeculatable]>;
366370

367371

368372
//===----------------------------------------------------------------------===//

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,19 @@ defm "" : ReplaceLane<I64x2, 30>;
702702
defm "" : ReplaceLane<F32x4, 32>;
703703
defm "" : ReplaceLane<F64x2, 34>;
704704

705+
// For now use an instrinsic for f16x8.replace_lane instead of ReplaceLane above
706+
// since LLVM IR generated with half type arguments is not well supported and
707+
// creates conversions from f16->f32.
708+
defm REPLACE_LANE_F16x8 :
709+
HALF_PRECISION_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, F32:$x),
710+
(outs), (ins vec_i8imm_op:$idx),
711+
[(set (v8f16 V128:$dst), (int_wasm_replace_lane_f16x8
712+
(v8f16 V128:$vec),
713+
(i32 LaneIdx8:$idx),
714+
(f32 F32:$x)))],
715+
"f16x8.replace_lane\t$dst, $vec, $idx, $x",
716+
"f16x8.replace_lane\t$idx", 0x122>;
717+
705718
// Lower undef lane indices to zero
706719
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
707720
(REPLACE_LANE_I8x16 $vec, 0, $x)>;

llvm/test/CodeGen/WebAssembly/half-precision.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ define float @extract_lane_v8f16(<8 x half> %v) {
3636
ret float %r
3737
}
3838

39+
; CHECK-LABEL: replace_lane_v8f16:
40+
; CHECK: f16x8.replace_lane $push0=, $0, 1, $1
41+
; CHECK-NEXT: return $pop0
42+
define <8 x half> @replace_lane_v8f16(<8 x half> %v, float %f) {
43+
%r = call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %v, i32 1, float %f)
44+
ret <8 x half> %r
45+
}
46+
3947
; CHECK-LABEL: add_v8f16:
4048
; CHECK: f16x8.add $push0=, $0, $1
4149
; CHECK-NEXT: return $pop0

llvm/test/MC/WebAssembly/simd-encodings.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -851,6 +851,9 @@ main:
851851
# CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01]
852852
f16x8.extract_lane 1
853853

854+
# CHECK: f16x8.replace_lane 1 # encoding: [0xfd,0xa2,0x02,0x01]
855+
f16x8.replace_lane 1
856+
854857
# CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02]
855858
f16x8.add
856859

0 commit comments

Comments
 (0)