Skip to content

Commit 3d49d1c

Browse files
committed
[WebAssembly] Implement pseudo-min/max SIMD instructions
Summary: As proposed in WebAssembly/simd#122. Since these instructions are not yet merged to the SIMD spec proposal, this patch makes them entirely opt-in by surfacing them only through LLVM intrinsics and clang builtins. If these instructions are made official, these intrinsics and builtins should be replaced with simple instruction patterns. Reviewers: aheejin Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D79742
1 parent 25a95f4 commit 3d49d1c

File tree

8 files changed

+137
-0
lines changed

8 files changed

+137
-0
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,12 @@ TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")
139139

140140
TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128")
141141
TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128")
142+
TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128")
143+
TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128")
142144
TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
143145
TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
146+
TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
147+
TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")
144148

145149
TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
146150

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15758,6 +15758,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
1575815758
ConvertType(E->getType()));
1575915759
return Builder.CreateCall(Callee, {LHS, RHS});
1576015760
}
15761+
case WebAssembly::BI__builtin_wasm_pmin_f32x4:
15762+
case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
15763+
Value *LHS = EmitScalarExpr(E->getArg(0));
15764+
Value *RHS = EmitScalarExpr(E->getArg(1));
15765+
Function *Callee =
15766+
CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
15767+
return Builder.CreateCall(Callee, {LHS, RHS});
15768+
}
15769+
case WebAssembly::BI__builtin_wasm_pmax_f32x4:
15770+
case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
15771+
Value *LHS = EmitScalarExpr(E->getArg(0));
15772+
Value *RHS = EmitScalarExpr(E->getArg(1));
15773+
Function *Callee =
15774+
CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
15775+
return Builder.CreateCall(Callee, {LHS, RHS});
15776+
}
1576115777
case WebAssembly::BI__builtin_wasm_swizzle_v8x16: {
1576215778
Value *Src = EmitScalarExpr(E->getArg(0));
1576315779
Value *Indices = EmitScalarExpr(E->getArg(1));

clang/lib/Headers/wasm_simd128.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -937,6 +937,16 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t __a,
937937
return (v128_t)__builtin_wasm_max_f32x4((__f32x4)__a, (__f32x4)__b);
938938
}
939939

940+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a,
941+
v128_t __b) {
942+
return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b);
943+
}
944+
945+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a,
946+
v128_t __b) {
947+
return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b);
948+
}
949+
940950
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) {
941951
return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)__a);
942952
}
@@ -997,6 +1007,16 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t __a,
9971007
return (v128_t)__builtin_wasm_max_f64x2((__f64x2)__a, (__f64x2)__b);
9981008
}
9991009

1010+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a,
1011+
v128_t __b) {
1012+
return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b);
1013+
}
1014+
1015+
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a,
1016+
v128_t __b) {
1017+
return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b);
1018+
}
1019+
10001020
static __inline__ v128_t __DEFAULT_FN_ATTRS
10011021
wasm_i32x4_trunc_saturate_f32x4(v128_t __a) {
10021022
return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)__a);

clang/test/CodeGen/builtins-wasm.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,20 @@ f32x4 max_f32x4(f32x4 x, f32x4 y) {
579579
// WEBASSEMBLY-NEXT: ret
580580
}
581581

582+
f32x4 pmin_f32x4(f32x4 x, f32x4 y) {
583+
return __builtin_wasm_pmin_f32x4(x, y);
584+
// WEBASSEMBLY: call <4 x float> @llvm.wasm.pmin.v4f32(
585+
// WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
586+
// WEBASSEMBLY-NEXT: ret
587+
}
588+
589+
f32x4 pmax_f32x4(f32x4 x, f32x4 y) {
590+
return __builtin_wasm_pmax_f32x4(x, y);
591+
// WEBASSEMBLY: call <4 x float> @llvm.wasm.pmax.v4f32(
592+
// WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
593+
// WEBASSEMBLY-NEXT: ret
594+
}
595+
582596
f64x2 min_f64x2(f64x2 x, f64x2 y) {
583597
return __builtin_wasm_min_f64x2(x, y);
584598
// WEBASSEMBLY: call <2 x double> @llvm.minimum.v2f64(
@@ -593,6 +607,20 @@ f64x2 max_f64x2(f64x2 x, f64x2 y) {
593607
// WEBASSEMBLY-NEXT: ret
594608
}
595609

610+
f64x2 pmin_f64x2(f64x2 x, f64x2 y) {
611+
return __builtin_wasm_pmin_f64x2(x, y);
612+
// WEBASSEMBLY: call <2 x double> @llvm.wasm.pmin.v2f64(
613+
// WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
614+
// WEBASSEMBLY-NEXT: ret
615+
}
616+
617+
f64x2 pmax_f64x2(f64x2 x, f64x2 y) {
618+
return __builtin_wasm_pmax_f64x2(x, y);
619+
// WEBASSEMBLY: call <2 x double> @llvm.wasm.pmax.v2f64(
620+
// WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
621+
// WEBASSEMBLY-NEXT: ret
622+
}
623+
596624
f32x4 sqrt_f32x4(f32x4 x) {
597625
return __builtin_wasm_sqrt_f32x4(x);
598626
// WEBASSEMBLY: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,17 @@ def int_wasm_widen_high_unsigned :
176176
[llvm_anyvector_ty],
177177
[IntrNoMem, IntrSpeculatable]>;
178178

179+
// TODO: Replace these intrinsics with normal ISel patterns once the
180+
// pmin/pmax instructions are merged to the spec proposal.
181+
def int_wasm_pmin :
182+
Intrinsic<[llvm_anyvector_ty],
183+
[LLVMMatchType<0>, LLVMMatchType<0>],
184+
[IntrNoMem, IntrSpeculatable]>;
185+
def int_wasm_pmax :
186+
Intrinsic<[llvm_anyvector_ty],
187+
[LLVMMatchType<0>, LLVMMatchType<0>],
188+
[IntrNoMem, IntrSpeculatable]>;
189+
179190
//===----------------------------------------------------------------------===//
180191
// Bulk memory intrinsics
181192
//===----------------------------------------------------------------------===//

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,12 @@ defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
794794
// NaN-propagating maximum: max
795795
defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
796796

797+
// Pseudo-minimum: pmin
798+
defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>;
799+
800+
// Pseudo-maximum: pmax
801+
defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>;
802+
797803
//===----------------------------------------------------------------------===//
798804
// Conversions
799805
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,26 @@ define <4 x float> @bitselect_v4f32(<4 x float> %v1, <4 x float> %v2, <4 x float
501501
ret <4 x float> %a
502502
}
503503

504+
; CHECK-LABEL: pmin_v4f32:
505+
; SIMD128-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}}
506+
; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
507+
; SIMD128-NEXT: return $pop[[R]]{{$}}
508+
declare <4 x float> @llvm.wasm.pmin.v4f32(<4 x float>, <4 x float>)
509+
define <4 x float> @pmin_v4f32(<4 x float> %a, <4 x float> %b) {
510+
%v = call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> %a, <4 x float> %b)
511+
ret <4 x float> %v
512+
}
513+
514+
; CHECK-LABEL: pmax_v4f32:
515+
; SIMD128-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}}
516+
; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
517+
; SIMD128-NEXT: return $pop[[R]]{{$}}
518+
declare <4 x float> @llvm.wasm.pmax.v4f32(<4 x float>, <4 x float>)
519+
define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) {
520+
%v = call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> %a, <4 x float> %b)
521+
ret <4 x float> %v
522+
}
523+
504524
; CHECK-LABEL: qfma_v4f32:
505525
; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}}
506526
; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
@@ -540,6 +560,26 @@ define <2 x double> @bitselect_v2f64(<2 x double> %v1, <2 x double> %v2, <2 x do
540560
ret <2 x double> %a
541561
}
542562

563+
; CHECK-LABEL: pmin_v2f64:
564+
; SIMD128-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}}
565+
; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
566+
; SIMD128-NEXT: return $pop[[R]]{{$}}
567+
declare <2 x double> @llvm.wasm.pmin.v2f64(<2 x double>, <2 x double>)
568+
define <2 x double> @pmin_v2f64(<2 x double> %a, <2 x double> %b) {
569+
%v = call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> %a, <2 x double> %b)
570+
ret <2 x double> %v
571+
}
572+
573+
; CHECK-LABEL: pmax_v2f64:
574+
; SIMD128-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}}
575+
; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
576+
; SIMD128-NEXT: return $pop[[R]]{{$}}
577+
declare <2 x double> @llvm.wasm.pmax.v2f64(<2 x double>, <2 x double>)
578+
define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) {
579+
%v = call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> %a, <2 x double> %b)
580+
ret <2 x double> %v
581+
}
582+
543583
; CHECK-LABEL: qfma_v2f64:
544584
; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}}
545585
; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}

llvm/test/MC/WebAssembly/simd-encodings.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,12 @@ main:
535535
# CHECK: f32x4.max # encoding: [0xfd,0xe9,0x01]
536536
f32x4.max
537537

538+
# CHECK: f32x4.pmin # encoding: [0xfd,0xea,0x01]
539+
f32x4.pmin
540+
541+
# CHECK: f32x4.pmax # encoding: [0xfd,0xeb,0x01]
542+
f32x4.pmax
543+
538544
# CHECK: f64x2.abs # encoding: [0xfd,0xec,0x01]
539545
f64x2.abs
540546

@@ -562,6 +568,12 @@ main:
562568
# CHECK: f64x2.max # encoding: [0xfd,0xf5,0x01]
563569
f64x2.max
564570

571+
# CHECK: f64x2.pmin # encoding: [0xfd,0xf6,0x01]
572+
f64x2.pmin
573+
574+
# CHECK: f64x2.pmax # encoding: [0xfd,0xf7,0x01]
575+
f64x2.pmax
576+
565577
# CHECK: i32x4.trunc_sat_f32x4_s # encoding: [0xfd,0xf8,0x01]
566578
i32x4.trunc_sat_f32x4_s
567579

0 commit comments

Comments
 (0)