Skip to content

Commit e522010

Browse files
committed
[WebAssembly] Custom combines for f64x2.promote_low_f32x4
Replace the clang builtin function and LLVM intrinsic previously used to select the f64x2.promote_low_f32x4 instruction with custom combines from standard SelectionDAG nodes. Implement the new combines to share code with the similar combines for f64x2.convert_low_i32x4_{s,u}. Resolves PR50232. Differential Revision: https://reviews.llvm.org/D105675
1 parent 04c203e commit e522010

File tree

11 files changed

+198
-143
lines changed

11 files changed

+198
-143
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4iV4i", "nc", "simd128
192192
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
193193
TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
194194
TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128")
195-
TARGET_BUILTIN(__builtin_wasm_promote_low_f32x4_f64x2, "V2dV4f", "nc", "simd128")
196195

197196
TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4iiC*", "n", "simd128")
198197
TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLiC*", "n", "simd128")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17728,11 +17728,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
1772817728
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_demote_zero);
1772917729
return Builder.CreateCall(Callee, Vec);
1773017730
}
17731-
case WebAssembly::BI__builtin_wasm_promote_low_f32x4_f64x2: {
17732-
Value *Vec = EmitScalarExpr(E->getArg(0));
17733-
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_promote_low);
17734-
return Builder.CreateCall(Callee, Vec);
17735-
}
1773617731
case WebAssembly::BI__builtin_wasm_load32_zero: {
1773717732
Value *Ptr = EmitScalarExpr(E->getArg(0));
1773817733
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero);

clang/lib/Headers/wasm_simd128.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ typedef unsigned short __u16x4
4343
typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
4444
typedef unsigned int __u32x2
4545
__attribute__((__vector_size__(8), __aligned__(8)));
46+
typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
4647

4748
#define __DEFAULT_FN_ATTRS \
4849
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -1155,7 +1156,8 @@ wasm_f32x4_demote_f64x2_zero(v128_t __a) {
11551156

11561157
static __inline__ v128_t __DEFAULT_FN_ATTRS
11571158
wasm_f64x2_promote_low_f32x4(v128_t __a) {
1158-
return (v128_t)__builtin_wasm_promote_low_f32x4_f64x2((__f32x4)__a);
1159+
return (v128_t) __builtin_convertvector(
1160+
(__f32x2){((__f32x4)__a)[0], ((__f32x4)__a)[1]}, __f64x2);
11591161
}
11601162

11611163
#define wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \

clang/test/CodeGen/builtins-wasm.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -898,12 +898,6 @@ f32x4 wasm_demote_zero_f64x2_f32x4(f64x2 x) {
898898
// WEBASSEMBLY: ret
899899
}
900900

901-
f64x2 wasm_promote_low_f32x4_f64x2(f32x4 x) {
902-
return __builtin_wasm_promote_low_f32x4_f64x2(x);
903-
// WEBASSEMBLY: call <2 x double> @llvm.wasm.promote.low(<4 x float> %x)
904-
// WEBASSEMBLY: ret
905-
}
906-
907901
i32x4 load32_zero(const int *p) {
908902
return __builtin_wasm_load32_zero(p);
909903
// WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p)

clang/test/Headers/wasm.c

Lines changed: 77 additions & 76 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,13 +263,10 @@ def int_wasm_extadd_pairwise_unsigned :
263263
[LLVMSubdivide2VectorType<0>],
264264
[IntrNoMem, IntrSpeculatable]>;
265265

266-
// TODO: Remove these if possible if they are merged to the spec.
266+
// TODO: Remove this if possible.
267267
def int_wasm_demote_zero :
268268
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty],
269269
[IntrNoMem, IntrSpeculatable]>;
270-
def int_wasm_promote_low :
271-
Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty],
272-
[IntrNoMem, IntrSpeculatable]>;
273270

274271
//===----------------------------------------------------------------------===//
275272
// Thread-local storage intrinsics

llvm/lib/Target/WebAssembly/WebAssemblyISD.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ HANDLE_NODETYPE(EXTEND_HIGH_S)
3737
HANDLE_NODETYPE(EXTEND_HIGH_U)
3838
HANDLE_NODETYPE(CONVERT_LOW_S)
3939
HANDLE_NODETYPE(CONVERT_LOW_U)
40+
HANDLE_NODETYPE(PROMOTE_LOW)
4041
HANDLE_NODETYPE(TRUNC_SAT_ZERO_S)
4142
HANDLE_NODETYPE(TRUNC_SAT_ZERO_U)
4243
HANDLE_NODETYPE(THROW)

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 89 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
149149
setTargetDAGCombine(ISD::SIGN_EXTEND);
150150
setTargetDAGCombine(ISD::ZERO_EXTEND);
151151

152-
// Combine int_to_fp of extract_vectors and vice versa into conversions ops
152+
// Combine int_to_fp or fp_extend of extract_vectors and vice versa into
153+
// conversions ops
153154
setTargetDAGCombine(ISD::SINT_TO_FP);
154155
setTargetDAGCombine(ISD::UINT_TO_FP);
156+
setTargetDAGCombine(ISD::FP_EXTEND);
155157
setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
156158

157159
// Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u}
@@ -2186,60 +2188,109 @@ performVectorConvertLowCombine(SDNode *N,
21862188
if (ResVT != MVT::v2f64)
21872189
return SDValue();
21882190

2189-
if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) {
2190-
// Combine this:
2191-
//
2192-
// (v2f64 ({s,u}int_to_fp
2193-
// (v2i32 (extract_subvector (v4i32 $x), 0))))
2194-
//
2195-
// into (f64x2.convert_low_i32x4_{s,u} $x).
2196-
auto Extract = N->getOperand(0);
2197-
if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2198-
return SDValue();
2199-
if (Extract.getValueType() != MVT::v2i32)
2200-
return SDValue();
2201-
auto Source = Extract.getOperand(0);
2202-
if (Source.getValueType() != MVT::v4i32)
2203-
return SDValue();
2204-
auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2205-
if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
2206-
return SDValue();
2207-
2208-
unsigned Op = N->getOpcode() == ISD::SINT_TO_FP
2209-
? WebAssemblyISD::CONVERT_LOW_S
2210-
: WebAssemblyISD::CONVERT_LOW_U;
2211-
2212-
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2191+
auto GetWasmConversionOp = [](unsigned Op) {
2192+
switch (Op) {
2193+
case ISD::SINT_TO_FP:
2194+
return WebAssemblyISD::CONVERT_LOW_S;
2195+
case ISD::UINT_TO_FP:
2196+
return WebAssemblyISD::CONVERT_LOW_U;
2197+
case ISD::FP_EXTEND:
2198+
return WebAssemblyISD::PROMOTE_LOW;
2199+
}
2200+
llvm_unreachable("unexpected op");
2201+
};
22132202

2214-
} else if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
2203+
if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
22152204
// Combine this:
22162205
//
22172206
// (v2f64 (extract_subvector
22182207
// (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0))
22192208
//
22202209
// into (f64x2.convert_low_i32x4_{s,u} $x).
2221-
auto IntToFP = N->getOperand(0);
2222-
if (IntToFP.getOpcode() != ISD::SINT_TO_FP &&
2223-
IntToFP.getOpcode() != ISD::UINT_TO_FP)
2210+
//
2211+
// Or this:
2212+
//
2213+
// (v2f64 (extract_subvector
2214+
// (v4f64 (fp_extend (v4f32 $x))), 0))
2215+
//
2216+
// into (f64x2.promote_low_f32x4 $x).
2217+
auto Conversion = N->getOperand(0);
2218+
auto ConversionOp = Conversion.getOpcode();
2219+
MVT ExpectedSourceType;
2220+
switch (ConversionOp) {
2221+
case ISD::SINT_TO_FP:
2222+
case ISD::UINT_TO_FP:
2223+
ExpectedSourceType = MVT::v4i32;
2224+
break;
2225+
case ISD::FP_EXTEND:
2226+
ExpectedSourceType = MVT::v4f32;
2227+
break;
2228+
default:
22242229
return SDValue();
2225-
if (IntToFP.getValueType() != MVT::v4f64)
2230+
}
2231+
2232+
if (Conversion.getValueType() != MVT::v4f64)
22262233
return SDValue();
2227-
auto Source = IntToFP.getOperand(0);
2228-
if (Source.getValueType() != MVT::v4i32)
2234+
2235+
auto Source = Conversion.getOperand(0);
2236+
if (Source.getValueType() != ExpectedSourceType)
22292237
return SDValue();
2238+
22302239
auto IndexNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
22312240
if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
22322241
return SDValue();
22332242

2234-
unsigned Op = IntToFP->getOpcode() == ISD::SINT_TO_FP
2235-
? WebAssemblyISD::CONVERT_LOW_S
2236-
: WebAssemblyISD::CONVERT_LOW_U;
2237-
2243+
auto Op = GetWasmConversionOp(ConversionOp);
22382244
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2245+
}
22392246

2240-
} else {
2247+
// Combine this:
2248+
//
2249+
// (v2f64 ({s,u}int_to_fp
2250+
// (v2i32 (extract_subvector (v4i32 $x), 0))))
2251+
//
2252+
// into (f64x2.convert_low_i32x4_{s,u} $x).
2253+
//
2254+
// Or this:
2255+
//
2256+
// (v2f64 (fp_extend
2257+
// (v2f32 (extract_subvector (v4f32 $x), 0))))
2258+
//
2259+
// into (f64x2.promote_low_f32x4 $x).
2260+
auto ConversionOp = N->getOpcode();
2261+
MVT ExpectedExtractType;
2262+
MVT ExpectedSourceType;
2263+
switch (ConversionOp) {
2264+
case ISD::SINT_TO_FP:
2265+
case ISD::UINT_TO_FP:
2266+
ExpectedExtractType = MVT::v2i32;
2267+
ExpectedSourceType = MVT::v4i32;
2268+
break;
2269+
case ISD::FP_EXTEND:
2270+
ExpectedExtractType = MVT::v2f32;
2271+
ExpectedSourceType = MVT::v4f32;
2272+
break;
2273+
default:
22412274
llvm_unreachable("unexpected opcode");
22422275
}
2276+
2277+
auto Extract = N->getOperand(0);
2278+
if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2279+
return SDValue();
2280+
2281+
if (Extract.getValueType() != ExpectedExtractType)
2282+
return SDValue();
2283+
2284+
auto Source = Extract.getOperand(0);
2285+
if (Source.getValueType() != ExpectedSourceType)
2286+
return SDValue();
2287+
2288+
auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2289+
if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
2290+
return SDValue();
2291+
2292+
unsigned Op = GetWasmConversionOp(ConversionOp);
2293+
return DAG.getNode(Op, SDLoc(N), ResVT, Source);
22432294
}
22442295

22452296
static SDValue
@@ -2298,6 +2349,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
22982349
return performVectorExtendCombine(N, DCI);
22992350
case ISD::SINT_TO_FP:
23002351
case ISD::UINT_TO_FP:
2352+
case ISD::FP_EXTEND:
23012353
case ISD::EXTRACT_SUBVECTOR:
23022354
return performVectorConvertLowCombine(N, DCI);
23032355
case ISD::CONCAT_VECTORS:

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,11 +1288,13 @@ defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
12881288
defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
12891289
"extadd_pairwise_i16x8_u", 0x7f>;
12901290

1291-
// Prototype f64x2 conversions
1291+
// f64x2 <-> f32x4 conversions
12921292
defm "" : SIMDConvert<F32x4, F64x2, int_wasm_demote_zero,
12931293
"demote_zero_f64x2", 0x5e>;
1294-
defm "" : SIMDConvert<F64x2, F32x4, int_wasm_promote_low,
1295-
"promote_low_f32x4", 0x5f>;
1294+
1295+
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
1296+
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
1297+
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
12961298

12971299
//===----------------------------------------------------------------------===//
12981300
// Saturating Rounding Q-Format Multiplication

llvm/test/CodeGen/WebAssembly/simd-conversions.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,25 @@ define <2 x double> @convert_low_u_v2f64_2(<4 x i32> %x) {
126126
%a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
127127
ret <2 x double> %a
128128
}
129+
130+
; CHECK-LABEL: promote_low_v2f64:
131+
; NO-SIMD128-NOT: f64x2
132+
; SIMD128-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
133+
; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
134+
; SIMD128-NEXT: return $pop[[R]]
135+
define <2 x double> @promote_low_v2f64(<4 x float> %x) {
136+
%v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 0, i32 1>
137+
%a = fpext <2 x float> %v to <2 x double>
138+
ret <2 x double> %a
139+
}
140+
141+
; CHECK-LABEL: promote_low_v2f64_2:
142+
; NO-SIMD128-NOT: f64x2
143+
; SIMD128-NEXT: .functype promote_low_v2f64_2 (v128) -> (v128){{$}}
144+
; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0
145+
; SIMD128-NEXT: return $pop[[R]]
146+
define <2 x double> @promote_low_v2f64_2(<4 x float> %x) {
147+
%v = fpext <4 x float> %x to <4 x double>
148+
%a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
149+
ret <2 x double> %a
150+
}

llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -806,13 +806,3 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) {
806806
%v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
807807
ret <2 x double> %v
808808
}
809-
810-
; CHECK-LABEL: promote_low_v2f64:
811-
; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
812-
; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}}
813-
; CHECK-NEXT: return $pop[[R]]{{$}}
814-
declare <2 x double> @llvm.wasm.promote.low(<4 x float>)
815-
define <2 x double> @promote_low_v2f64(<4 x float> %a) {
816-
%v = call <2 x double> @llvm.wasm.promote.low(<4 x float> %a)
817-
ret <2 x double> %v
818-
}

0 commit comments

Comments
 (0)