Skip to content

Commit dfd1a2f

Browse files
authored
[WebAssembly] Implement all f16x8 unary instructions. (#94063)
All of these instructions can be generated using regular LL intrinsics. Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md
1 parent 0cb66a7 commit dfd1a2f

File tree

3 files changed

+128
-2
lines changed

3 files changed

+128
-2
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -828,12 +828,18 @@ multiclass SIMDBitwise<SDPatternOperator node, string name, bits<32> simdop,
828828
(!cast<NI>(NAME) $lhs, $rhs)>;
829829
}
830830

831-
multiclass SIMDUnary<Vec vec, SDPatternOperator node, string name, bits<32> simdop> {
831+
multiclass SIMDUnary<Vec vec, SDPatternOperator node, string name,
832+
bits<32> simdop, list<Predicate> reqs = []> {
832833
defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins),
833834
[(set (vec.vt V128:$dst),
834835
(vec.vt (node (vec.vt V128:$v))))],
835836
vec.prefix#"."#name#"\t$dst, $v",
836-
vec.prefix#"."#name, simdop>;
837+
vec.prefix#"."#name, simdop, reqs>;
838+
}
839+
840+
multiclass HalfPrecisionUnary<Vec vec, SDPatternOperator node, string name,
841+
bits<32> simdop> {
842+
defm "" : SIMDUnary<vec, node, name, simdop, [HasHalfPrecision]>;
837843
}
838844

839845
// Bitwise logic: v128.not
@@ -1190,6 +1196,10 @@ defm EXTMUL_HIGH_U :
11901196
multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
11911197
defm "" : SIMDUnary<F32x4, node, name, baseInst>;
11921198
defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>;
1199+
// Unlike F32x4 and F64x2 there's not a gap in the opcodes between "neg" and
1200+
// "sqrt" so subtract one from the offset.
1201+
defm "" : HalfPrecisionUnary<F16x8, node, name,
1202+
!add(baseInst,!if(!eq(name, "sqrt"), 80, 81))>;
11931203
}
11941204

11951205
// Absolute value: abs
@@ -1210,14 +1220,20 @@ defm CEIL : SIMDUnary<F64x2, fceil, "ceil", 0x74>;
12101220
defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>;
12111221
defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>;
12121222
defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>;
1223+
defm CEIL : HalfPrecisionUnary<F16x8, fceil, "ceil", 0x13c>;
1224+
defm FLOOR : HalfPrecisionUnary<F16x8, ffloor, "floor", 0x13d>;
1225+
defm TRUNC : HalfPrecisionUnary<F16x8, ftrunc, "trunc", 0x13e>;
1226+
defm NEAREST : HalfPrecisionUnary<F16x8, fnearbyint, "nearest", 0x13f>;
12131227

12141228
// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint.
12151229
def : Pat<(v4f32 (frint (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>;
12161230
def : Pat<(v2f64 (frint (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>;
1231+
def : Pat<(v8f16 (frint (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>;
12171232

12181233
// WebAssembly always rounds ties-to-even, so map froundeven to fnearbyint.
12191234
def : Pat<(v4f32 (froundeven (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>;
12201235
def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>;
1236+
def : Pat<(v8f16 (froundeven (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>;
12211237

12221238
//===----------------------------------------------------------------------===//
12231239
// Floating-point binary arithmetic

llvm/test/CodeGen/WebAssembly/half-precision.ll

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,92 @@ define <8 x i1> @compare_oge_v8f16 (<8 x half> %x, <8 x half> %y) {
157157
%res = fcmp oge <8 x half> %x, %y
158158
ret <8 x i1> %res
159159
}
160+
161+
; CHECK-LABEL: abs_v8f16:
162+
; CHECK-NEXT: .functype abs_v8f16 (v128) -> (v128)
163+
; CHECK-NEXT: f16x8.abs $push0=, $0
164+
; CHECK-NEXT: return $pop0
165+
declare <8 x half> @llvm.fabs.v8f16(<8 x half>) nounwind readnone
166+
define <8 x half> @abs_v8f16(<8 x half> %x) {
167+
%a = call <8 x half> @llvm.fabs.v8f16(<8 x half> %x)
168+
ret <8 x half> %a
169+
}
170+
171+
; CHECK-LABEL: neg_v8f16:
172+
; CHECK-NEXT: .functype neg_v8f16 (v128) -> (v128)
173+
; CHECK-NEXT: f16x8.neg $push0=, $0
174+
; CHECK-NEXT: return $pop0
175+
define <8 x half> @neg_v8f16(<8 x half> %x) {
176+
%a = fsub nsz <8 x half> <half 0., half 0., half 0., half 0., half 0., half 0., half 0., half 0.>, %x
177+
ret <8 x half> %a
178+
}
179+
180+
; CHECK-LABEL: sqrt_v8f16:
181+
; CHECK-NEXT: .functype sqrt_v8f16 (v128) -> (v128)
182+
; CHECK-NEXT: f16x8.sqrt $push0=, $0
183+
; CHECK-NEXT: return $pop0
184+
declare <8 x half> @llvm.sqrt.v8f16(<8 x half> %x)
185+
define <8 x half> @sqrt_v8f16(<8 x half> %x) {
186+
%a = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %x)
187+
ret <8 x half> %a
188+
}
189+
190+
; CHECK-LABEL: ceil_v8f16:
191+
; CHECK-NEXT: .functype ceil_v8f16 (v128) -> (v128){{$}}
192+
; CHECK-NEXT: f16x8.ceil $push[[R:[0-9]+]]=, $0{{$}}
193+
; CHECK-NEXT: return $pop[[R]]{{$}}
194+
declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
195+
define <8 x half> @ceil_v8f16(<8 x half> %a) {
196+
%v = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
197+
ret <8 x half> %v
198+
}
199+
200+
; CHECK-LABEL: floor_v8f16:
201+
; CHECK-NEXT: .functype floor_v8f16 (v128) -> (v128){{$}}
202+
; CHECK-NEXT: f16x8.floor $push[[R:[0-9]+]]=, $0{{$}}
203+
; CHECK-NEXT: return $pop[[R]]{{$}}
204+
declare <8 x half> @llvm.floor.v8f16(<8 x half>)
205+
define <8 x half> @floor_v8f16(<8 x half> %a) {
206+
%v = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
207+
ret <8 x half> %v
208+
}
209+
210+
; CHECK-LABEL: trunc_v8f16:
211+
; CHECK-NEXT: .functype trunc_v8f16 (v128) -> (v128){{$}}
212+
; CHECK-NEXT: f16x8.trunc $push[[R:[0-9]+]]=, $0{{$}}
213+
; CHECK-NEXT: return $pop[[R]]{{$}}
214+
declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
215+
define <8 x half> @trunc_v8f16(<8 x half> %a) {
216+
%v = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
217+
ret <8 x half> %v
218+
}
219+
220+
; CHECK-LABEL: nearest_v8f16:
221+
; CHECK-NEXT: .functype nearest_v8f16 (v128) -> (v128){{$}}
222+
; CHECK-NEXT: f16x8.nearest $push[[R:[0-9]+]]=, $0{{$}}
223+
; CHECK-NEXT: return $pop[[R]]{{$}}
224+
declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
225+
define <8 x half> @nearest_v8f16(<8 x half> %a) {
226+
%v = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
227+
ret <8 x half> %v
228+
}
229+
230+
; CHECK-LABEL: nearest_v8f16_via_rint:
231+
; CHECK-NEXT: .functype nearest_v8f16_via_rint (v128) -> (v128){{$}}
232+
; CHECK-NEXT: f16x8.nearest $push[[R:[0-9]+]]=, $0{{$}}
233+
; CHECK-NEXT: return $pop[[R]]{{$}}
234+
declare <8 x half> @llvm.rint.v8f16(<8 x half>)
235+
define <8 x half> @nearest_v8f16_via_rint(<8 x half> %a) {
236+
%v = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
237+
ret <8 x half> %v
238+
}
239+
240+
; CHECK-LABEL: nearest_v8f16_via_roundeven:
241+
; CHECK-NEXT: .functype nearest_v8f16_via_roundeven (v128) -> (v128){{$}}
242+
; CHECK-NEXT: f16x8.nearest $push[[R:[0-9]+]]=, $0{{$}}
243+
; CHECK-NEXT: return $pop[[R]]{{$}}
244+
declare <8 x half> @llvm.roundeven.v8f16(<8 x half>)
245+
define <8 x half> @nearest_v8f16_via_roundeven(<8 x half> %a) {
246+
%v = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %a)
247+
ret <8 x half> %v
248+
}

llvm/test/MC/WebAssembly/simd-encodings.s

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,4 +893,25 @@ main:
893893
# CHECK: f16x8.ge # encoding: [0xfd,0xc5,0x02]
894894
f16x8.ge
895895

896+
# CHECK: f16x8.abs # encoding: [0xfd,0xb1,0x02]
897+
f16x8.abs
898+
899+
# CHECK: f16x8.neg # encoding: [0xfd,0xb2,0x02]
900+
f16x8.neg
901+
902+
# CHECK: f16x8.sqrt # encoding: [0xfd,0xb3,0x02]
903+
f16x8.sqrt
904+
905+
# CHECK: f16x8.ceil # encoding: [0xfd,0xbc,0x02]
906+
f16x8.ceil
907+
908+
# CHECK: f16x8.floor # encoding: [0xfd,0xbd,0x02]
909+
f16x8.floor
910+
911+
# CHECK: f16x8.trunc # encoding: [0xfd,0xbe,0x02]
912+
f16x8.trunc
913+
914+
# CHECK: f16x8.nearest # encoding: [0xfd,0xbf,0x02]
915+
f16x8.nearest
916+
896917
end_function

0 commit comments

Comments
 (0)