Skip to content

Commit e657c84

Browse files
committed
[WebAssembly] Use v128.const instead of splats for constants
We previously used splats instead of v128.const to materialize vector constants because V8 did not support v128.const. Now that V8 supports v128.const, we can use v128.const instead. Although this increases code size, it should also increase performance (or at least require fewer engine-side optimizations), so it is an appropriate change to make. Differential Revision: https://reviews.llvm.org/D100716
1 parent 6c5b0d6 commit e657c84

File tree

4 files changed

+29
-47
lines changed

4 files changed

+29
-47
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,20 +1160,26 @@ defm "" : SIMDNarrow<I32x4, 133>;
11601160
// Use narrowing operations for truncating stores. Since the narrowing
11611161
// operations are saturating instead of truncating, we need to mask
11621162
// the stored values first.
1163-
// TODO: Use consts instead of splats
11641163
def store_v8i8_trunc_v8i16 :
11651164
OutPatFrag<(ops node:$val),
11661165
(EXTRACT_LANE_I64x2
11671166
(NARROW_U_I8x16
1168-
(AND (SPLAT_I32x4 (CONST_I32 0x00ff00ff)), node:$val),
1167+
(AND
1168+
(CONST_V128_I16x8
1169+
0x00ff, 0x00ff, 0x00ff, 0x00ff,
1170+
0x00ff, 0x00ff, 0x00ff, 0x00ff),
1171+
node:$val),
11691172
$val), // Unused input
11701173
0)>;
11711174

11721175
def store_v4i16_trunc_v4i32 :
11731176
OutPatFrag<(ops node:$val),
11741177
(EXTRACT_LANE_I64x2
11751178
(NARROW_U_I16x8
1176-
(AND (SPLAT_I32x4 (CONST_I32 0x0000ffff)), node:$val),
1179+
(AND
1180+
(CONST_V128_I32x4
1181+
0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff),
1182+
node:$val),
11771183
$val), // Unused input
11781184
0)>;
11791185

llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,9 @@ static void convertImplicitDefToConstZero(MachineInstr *MI,
121121
Type::getDoubleTy(MF.getFunction().getContext())));
122122
MI->addOperand(MachineOperand::CreateFPImm(Val));
123123
} else if (RegClass == &WebAssembly::V128RegClass) {
124-
// TODO: Replace this with v128.const 0 once that is supported in V8
125-
Register TempReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
126-
MI->setDesc(TII->get(WebAssembly::SPLAT_I32x4));
127-
MI->addOperand(MachineOperand::CreateReg(TempReg, false));
128-
MachineInstr *Const = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
129-
TII->get(WebAssembly::CONST_I32), TempReg)
130-
.addImm(0);
131-
LIS.InsertMachineInstrInMaps(*Const);
124+
MI->setDesc(TII->get(WebAssembly::CONST_V128_I64x2));
125+
MI->addOperand(MachineOperand::CreateImm(0));
126+
MI->addOperand(MachineOperand::CreateImm(0));
132127
} else {
133128
llvm_unreachable("Unexpected reg class");
134129
}

llvm/test/CodeGen/WebAssembly/implicit-def.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,7 @@ X: ; preds = %0, C
9797
}
9898

9999
; CHECK-LABEL: implicit_def_v4i32:
100-
; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}}
101-
; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}}
102-
; CHECK: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
103-
; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L0]]
100+
; CHECK: v128.const $push[[R:[0-9]+]]=, 0, 0{{$}}
104101
; CHECK-NEXT: return $pop[[R]]{{$}}
105102
define <4 x i32> @implicit_def_v4i32() {
106103
br i1 undef, label %A, label %X

llvm/test/CodeGen/WebAssembly/simd-offset.ll

Lines changed: 16 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -923,8 +923,7 @@ define void @store_narrowing_v8i16(<8 x i8> %v, <8 x i8>* %p) {
923923
; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> ()
924924
; CHECK-NEXT: # %bb.0:
925925
; CHECK-NEXT: local.get 1
926-
; CHECK-NEXT: i32.const 16711935
927-
; CHECK-NEXT: i32x4.splat
926+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
928927
; CHECK-NEXT: local.get 0
929928
; CHECK-NEXT: v128.and
930929
; CHECK-NEXT: local.get 0
@@ -956,8 +955,7 @@ define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, <8 x i8>* %p)
956955
; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
957956
; CHECK-NEXT: # %bb.0:
958957
; CHECK-NEXT: local.get 1
959-
; CHECK-NEXT: i32.const 16711935
960-
; CHECK-NEXT: i32x4.splat
958+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
961959
; CHECK-NEXT: local.get 0
962960
; CHECK-NEXT: v128.and
963961
; CHECK-NEXT: local.get 0
@@ -990,8 +988,7 @@ define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, <8 x i8>*
990988
; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
991989
; CHECK-NEXT: # %bb.0:
992990
; CHECK-NEXT: local.get 1
993-
; CHECK-NEXT: i32.const 16711935
994-
; CHECK-NEXT: i32x4.splat
991+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
995992
; CHECK-NEXT: local.get 0
996993
; CHECK-NEXT: v128.and
997994
; CHECK-NEXT: local.get 0
@@ -1026,8 +1023,7 @@ define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v
10261023
; CHECK-NEXT: local.get 1
10271024
; CHECK-NEXT: i32.const -8
10281025
; CHECK-NEXT: i32.add
1029-
; CHECK-NEXT: i32.const 16711935
1030-
; CHECK-NEXT: i32x4.splat
1026+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
10311027
; CHECK-NEXT: local.get 0
10321028
; CHECK-NEXT: v128.and
10331029
; CHECK-NEXT: local.get 0
@@ -1064,8 +1060,7 @@ define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, <8 x i8>* %
10641060
; CHECK-NEXT: local.get 1
10651061
; CHECK-NEXT: i32.const 16
10661062
; CHECK-NEXT: i32.add
1067-
; CHECK-NEXT: i32.const 16711935
1068-
; CHECK-NEXT: i32x4.splat
1063+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
10691064
; CHECK-NEXT: local.get 0
10701065
; CHECK-NEXT: v128.and
10711066
; CHECK-NEXT: local.get 0
@@ -1102,8 +1097,7 @@ define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, <8 x i8
11021097
; CHECK-NEXT: local.get 1
11031098
; CHECK-NEXT: i32.const 8
11041099
; CHECK-NEXT: i32.add
1105-
; CHECK-NEXT: i32.const 16711935
1106-
; CHECK-NEXT: i32x4.splat
1100+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
11071101
; CHECK-NEXT: local.get 0
11081102
; CHECK-NEXT: v128.and
11091103
; CHECK-NEXT: local.get 0
@@ -1134,8 +1128,7 @@ define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, <8 x i8>* %p)
11341128
; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
11351129
; CHECK-NEXT: # %bb.0:
11361130
; CHECK-NEXT: i32.const 0
1137-
; CHECK-NEXT: i32.const 16711935
1138-
; CHECK-NEXT: i32x4.splat
1131+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
11391132
; CHECK-NEXT: local.get 0
11401133
; CHECK-NEXT: v128.and
11411134
; CHECK-NEXT: local.get 0
@@ -1165,8 +1158,7 @@ define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
11651158
; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> ()
11661159
; CHECK-NEXT: # %bb.0:
11671160
; CHECK-NEXT: i32.const 0
1168-
; CHECK-NEXT: i32.const 16711935
1169-
; CHECK-NEXT: i32x4.splat
1161+
; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255
11701162
; CHECK-NEXT: local.get 0
11711163
; CHECK-NEXT: v128.and
11721164
; CHECK-NEXT: local.get 0
@@ -1753,8 +1745,7 @@ define void @store_narrowing_v4i32(<4 x i16> %v, <4 x i16>* %p) {
17531745
; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> ()
17541746
; CHECK-NEXT: # %bb.0:
17551747
; CHECK-NEXT: local.get 1
1756-
; CHECK-NEXT: i32.const 65535
1757-
; CHECK-NEXT: i32x4.splat
1748+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
17581749
; CHECK-NEXT: local.get 0
17591750
; CHECK-NEXT: v128.and
17601751
; CHECK-NEXT: local.get 0
@@ -1786,8 +1777,7 @@ define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, <4 x i16>* %
17861777
; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
17871778
; CHECK-NEXT: # %bb.0:
17881779
; CHECK-NEXT: local.get 1
1789-
; CHECK-NEXT: i32.const 65535
1790-
; CHECK-NEXT: i32x4.splat
1780+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
17911781
; CHECK-NEXT: local.get 0
17921782
; CHECK-NEXT: v128.and
17931783
; CHECK-NEXT: local.get 0
@@ -1820,8 +1810,7 @@ define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, <4 x i16
18201810
; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
18211811
; CHECK-NEXT: # %bb.0:
18221812
; CHECK-NEXT: local.get 1
1823-
; CHECK-NEXT: i32.const 65535
1824-
; CHECK-NEXT: i32x4.splat
1813+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
18251814
; CHECK-NEXT: local.get 0
18261815
; CHECK-NEXT: v128.and
18271816
; CHECK-NEXT: local.get 0
@@ -1856,8 +1845,7 @@ define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %
18561845
; CHECK-NEXT: local.get 1
18571846
; CHECK-NEXT: i32.const -8
18581847
; CHECK-NEXT: i32.add
1859-
; CHECK-NEXT: i32.const 65535
1860-
; CHECK-NEXT: i32x4.splat
1848+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
18611849
; CHECK-NEXT: local.get 0
18621850
; CHECK-NEXT: v128.and
18631851
; CHECK-NEXT: local.get 0
@@ -1894,8 +1882,7 @@ define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, <4 x i16>*
18941882
; CHECK-NEXT: local.get 1
18951883
; CHECK-NEXT: i32.const 16
18961884
; CHECK-NEXT: i32.add
1897-
; CHECK-NEXT: i32.const 65535
1898-
; CHECK-NEXT: i32x4.splat
1885+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
18991886
; CHECK-NEXT: local.get 0
19001887
; CHECK-NEXT: v128.and
19011888
; CHECK-NEXT: local.get 0
@@ -1932,8 +1919,7 @@ define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, <4 x i
19321919
; CHECK-NEXT: local.get 1
19331920
; CHECK-NEXT: i32.const 8
19341921
; CHECK-NEXT: i32.add
1935-
; CHECK-NEXT: i32.const 65535
1936-
; CHECK-NEXT: i32x4.splat
1922+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
19371923
; CHECK-NEXT: local.get 0
19381924
; CHECK-NEXT: v128.and
19391925
; CHECK-NEXT: local.get 0
@@ -1964,8 +1950,7 @@ define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
19641950
; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
19651951
; CHECK-NEXT: # %bb.0:
19661952
; CHECK-NEXT: i32.const 0
1967-
; CHECK-NEXT: i32.const 65535
1968-
; CHECK-NEXT: i32x4.splat
1953+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
19691954
; CHECK-NEXT: local.get 0
19701955
; CHECK-NEXT: v128.and
19711956
; CHECK-NEXT: local.get 0
@@ -1995,8 +1980,7 @@ define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
19951980
; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> ()
19961981
; CHECK-NEXT: # %bb.0:
19971982
; CHECK-NEXT: i32.const 0
1998-
; CHECK-NEXT: i32.const 65535
1999-
; CHECK-NEXT: i32x4.splat
1983+
; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
20001984
; CHECK-NEXT: local.get 0
20011985
; CHECK-NEXT: v128.and
20021986
; CHECK-NEXT: local.get 0

0 commit comments

Comments
 (0)