Skip to content

Commit 8a3277a

Browse files
authored
[WebAssembly] Implement prototype f32.store_f16 instruction. (llvm#91545)
Adds a builtin and intrinsic for the f32.store_f16 instruction. The instruction stores an f32 value as an f16 memory. Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md Note: the current spec has f32.store_f16 as opcode 0xFD0121, but this is incorrect and will be changed to 0xFC31 soon.
1 parent a99cb96 commit 8a3277a

File tree

10 files changed

+75
-2
lines changed

10 files changed

+75
-2
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f"
192192

193193
// Half-Precision (fp16)
194194
TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
195+
TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision")
195196

196197
// Reference Types builtins
197198
// Some builtins are custom type-checked - see 't' as part of the third argument,

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21310,6 +21310,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2131021310
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
2131121311
return Builder.CreateCall(Callee, {Addr});
2131221312
}
21313+
case WebAssembly::BI__builtin_wasm_storef16_f32: {
21314+
Value *Val = EmitScalarExpr(E->getArg(0));
21315+
Value *Addr = EmitScalarExpr(E->getArg(1));
21316+
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
21317+
return Builder.CreateCall(Callee, {Val, Addr});
21318+
}
2131321319
case WebAssembly::BI__builtin_wasm_table_get: {
2131421320
assert(E->getArg(0)->getType()->isArrayType());
2131521321
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);

clang/test/CodeGen/builtins-wasm.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,12 @@ float load_f16_f32(__fp16 *addr) {
807807
// WEBASSEMBLY: call float @llvm.wasm.loadf16.f32(ptr %{{.*}})
808808
}
809809

810+
void store_f16_f32(float val, __fp16 *addr) {
811+
return __builtin_wasm_storef16_f32(val, addr);
812+
// WEBASSEMBLY: tail call void @llvm.wasm.storef16.f32(float %val, ptr %{{.*}})
813+
// WEBASSEMBLY-NEXT: ret
814+
}
815+
810816
__externref_t externref_null() {
811817
return __builtin_wasm_ref_null_extern();
812818
// WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern()

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,11 @@ def int_wasm_loadf16_f32:
332332
[llvm_ptr_ty],
333333
[IntrReadMem, IntrArgMemOnly],
334334
"", [SDNPMemOperand]>;
335+
def int_wasm_storef16_f32:
336+
Intrinsic<[],
337+
[llvm_float_ty, llvm_ptr_ty],
338+
[IntrWriteMem, IntrArgMemOnly],
339+
"", [SDNPMemOperand]>;
335340

336341

337342
//===----------------------------------------------------------------------===//

llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
207207
WASM_LOAD_STORE(LOAD_LANE_I16x8)
208208
WASM_LOAD_STORE(STORE_LANE_I16x8)
209209
WASM_LOAD_STORE(LOAD_F16_F32)
210+
WASM_LOAD_STORE(STORE_F16_F32)
210211
return 1;
211212
WASM_LOAD_STORE(LOAD_I32)
212213
WASM_LOAD_STORE(LOAD_F32)

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
914914
Info.align = Align(2);
915915
Info.flags = MachineMemOperand::MOLoad;
916916
return true;
917+
case Intrinsic::wasm_storef16_f32:
918+
Info.opc = ISD::INTRINSIC_VOID;
919+
Info.memVT = MVT::f16;
920+
Info.ptrVal = I.getArgOperand(1);
921+
Info.offset = 0;
922+
Info.align = Align(2);
923+
Info.flags = MachineMemOperand::MOStore;
924+
return true;
917925
default:
918926
return false;
919927
}

llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33, []>;
7272
defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>;
7373
defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>;
7474

75-
// Half Precision
76-
defm LOAD_F16_F32 : WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>;
75+
// Half-precision load.
76+
defm LOAD_F16_F32 :
77+
WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>;
7778

7879
// Pattern matching
7980

@@ -171,12 +172,18 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
171172
defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
172173
defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
173174

175+
// Half-precision store.
176+
defm STORE_F16_F32 :
177+
WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasHalfPrecision]>;
178+
174179
defm : StorePat<i32, truncstorei8, "STORE8_I32">;
175180
defm : StorePat<i32, truncstorei16, "STORE16_I32">;
176181
defm : StorePat<i64, truncstorei8, "STORE8_I64">;
177182
defm : StorePat<i64, truncstorei16, "STORE16_I64">;
178183
defm : StorePat<i64, truncstorei32, "STORE32_I64">;
179184

185+
defm : StorePat<f32, int_wasm_storef16_f32, "STORE_F16_F32">;
186+
180187
multiclass MemoryOps<WebAssemblyRegClass rc, string B> {
181188
// Current memory size.
182189
defm MEMORY_SIZE_A#B : I<(outs rc:$dst), (ins i32imm:$flags),

llvm/test/CodeGen/WebAssembly/half-precision.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s
33

44
declare float @llvm.wasm.loadf32.f16(ptr)
5+
declare void @llvm.wasm.storef16.f32(float, ptr)
56

67
; CHECK-LABEL: ldf16_32:
78
; CHECK: f32.load_f16 $push[[NUM0:[0-9]+]]=, 0($0){{$}}
@@ -10,3 +11,11 @@ define float @ldf16_32(ptr %p) {
1011
%v = call float @llvm.wasm.loadf16.f32(ptr %p)
1112
ret float %v
1213
}
14+
15+
; CHECK-LABEL: stf16_32:
16+
; CHECK: f32.store_f16 0($1), $0
17+
; CHECK-NEXT: return
18+
define void @stf16_32(float %v, ptr %p) {
19+
tail call void @llvm.wasm.storef16.f32(float %v, ptr %p)
20+
ret void
21+
}

llvm/test/CodeGen/WebAssembly/offset.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,3 +692,30 @@ define float @load_f16_f32_with_folded_gep_offset(ptr %p) {
692692
%t = call float @llvm.wasm.loadf16.f32(ptr %s)
693693
ret float %t
694694
}
695+
696+
;===----------------------------------------------------------------------------
697+
; Stores: Half Precision
698+
;===----------------------------------------------------------------------------
699+
700+
; Basic store.
701+
702+
; CHECK-LABEL: store_f16_f32_no_offset:
703+
; CHECK-NEXT: .functype store_f16_f32_no_offset (i32, f32) -> (){{$}}
704+
; CHECK-NEXT: f32.store_f16 0($0), $1{{$}}
705+
; CHECK-NEXT: return{{$}}
706+
define void @store_f16_f32_no_offset(ptr %p, float %v) {
707+
call void @llvm.wasm.storef16.f32(float %v, ptr %p)
708+
ret void
709+
}
710+
711+
; Storing to a fixed address.
712+
713+
; CHECK-LABEL: store_f16_f32_to_numeric_address:
714+
; CHECK: i32.const $push1=, 0{{$}}
715+
; CHECK-NEXT: f32.const $push0=, 0x0p0{{$}}
716+
; CHECK-NEXT: f32.store_f16 42($pop1), $pop0{{$}}
717+
define void @store_f16_f32_to_numeric_address() {
718+
%s = inttoptr i32 42 to ptr
719+
call void @llvm.wasm.storef16.f32(float 0.0, ptr %s)
720+
ret void
721+
}

llvm/test/MC/WebAssembly/simd-encodings.s

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,4 +842,7 @@ main:
842842
# CHECK: f32.load_f16 48 # encoding: [0xfc,0x30,0x01,0x30]
843843
f32.load_f16 48
844844

845+
# CHECK: f32.store_f16 32 # encoding: [0xfc,0x31,0x01,0x20]
846+
f32.store_f16 32
847+
845848
end_function

0 commit comments

Comments
 (0)