Skip to content

Commit 7e32606

Browse files
Concrete SIMD operations, part 1 (#36172)
Adds concrete overloads of the following SIMD operations: - Comparisons: .==, .!=, .<, .<=, .>, .>= - Logical operations on masks: .!, .&, .^, .| - Integer arithmetic: &+, &-, &, &+=, &-=, &= This makes some simple benchmarks 10-100x faster, which is basically a no-brainer, while staying away from the most heavily used operators, so hopefully doesn't impact compilation performance too badly.
1 parent da4b28d commit 7e32606

8 files changed

+504
-4
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ set(SWIFTLIB_SOURCES
225225

226226
set(SWIFTLIB_GYB_SOURCES
227227
${SWIFTLIB_ESSENTIAL_GYB_SOURCES}
228+
SIMDConcreteOperations.swift.gyb
228229
SIMDVectorTypes.swift.gyb
229230
Tuple.swift.gyb
230231
)

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@
169169
"FloatingPointTypes.swift",
170170
"FloatingPointRandom.swift"],
171171
"Vector": [
172+
"SIMDConcreteOperations.swift",
172173
"SIMDVector.swift",
173174
"SIMDVectorTypes.swift"]}
174175
],
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
//===--- SIMDConcreteOperations.swift -------------------------*- swift -*-===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
%{
14+
from __future__ import division
15+
from SwiftIntTypes import all_integer_types
16+
word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
17+
storagescalarCounts = [2,4,8,16,32,64]
18+
vectorscalarCounts = storagescalarCounts + [3]
19+
}%
20+
21+
%for int in all_integer_types(word_bits):
22+
% Scalar = int.stdlib_name
23+
% for n in vectorscalarCounts:
24+
% Vector = "SIMD" + str(n) + "<" + Scalar + ">"
25+
% storageN = 4 if n == 3 else n
26+
% s = "s" if int.is_signed else "u"
27+
% Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
28+
% if int.is_signed:
29+
extension SIMDMask where Storage == ${Vector} {
30+
@_alwaysEmitIntoClient
31+
internal init(_ _builtin: Builtin.Vec${storageN}xInt1) {
32+
_storage = ${Vector}(Builtin.sext_Vec${storageN}xInt1_${Builtin}(_builtin))
33+
}
34+
35+
@_alwaysEmitIntoClient
36+
internal static var allTrue: Self {
37+
let zero = ${Vector}()
38+
return zero .== zero
39+
}
40+
41+
/// A vector mask that is the boolean negation of the input.
42+
@_alwaysEmitIntoClient
43+
public static prefix func .!(a: Self) -> Self {
44+
a .^ .allTrue
45+
}
46+
47+
/// A vector mask that is the boolean conjunction of the inputs.
48+
@_alwaysEmitIntoClient
49+
public static func .&(a: Self, b: Self) -> Self {
50+
Self(${Vector}(Builtin.and_${Builtin}(
51+
a._storage._storage._value,
52+
b._storage._storage._value
53+
)))
54+
}
55+
56+
/// A vector mask that is the exclusive or of the inputs.
57+
@_alwaysEmitIntoClient
58+
public static func .^(a: Self, b: Self) -> Self {
59+
Self(${Vector}(Builtin.xor_${Builtin}(
60+
a._storage._storage._value,
61+
b._storage._storage._value
62+
)))
63+
}
64+
65+
/// A vector mask that is the boolean disjunction of the inputs.
66+
@_alwaysEmitIntoClient
67+
public static func .|(a: Self, b: Self) -> Self {
68+
Self(${Vector}(Builtin.or_${Builtin}(
69+
a._storage._storage._value,
70+
b._storage._storage._value
71+
)))
72+
}
73+
}
74+
75+
% end
76+
extension SIMD${n} where Scalar == ${Scalar} {
77+
@_alwaysEmitIntoClient
78+
internal init(_ _builtin: Builtin.${Builtin}) {
79+
_storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
80+
}
81+
82+
/// A vector mask with the result of a pointwise equality comparison.
83+
@_alwaysEmitIntoClient
84+
public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
85+
SIMDMask<MaskStorage>(
86+
Builtin.cmp_eq_${Builtin}(a._storage._value, b._storage._value)
87+
)
88+
}
89+
90+
/// A vector mask with the result of a pointwise inequality comparison.
91+
@_alwaysEmitIntoClient
92+
public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
93+
SIMDMask<MaskStorage>(
94+
Builtin.cmp_ne_${Builtin}(a._storage._value, b._storage._value)
95+
)
96+
}
97+
98+
/// A vector mask with the result of a pointwise less-than comparison.
99+
@_alwaysEmitIntoClient
100+
public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
101+
SIMDMask<MaskStorage>(
102+
Builtin.cmp_${s}lt_${Builtin}(a._storage._value, b._storage._value)
103+
)
104+
}
105+
106+
/// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
107+
@_alwaysEmitIntoClient
108+
public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
109+
SIMDMask<MaskStorage>(
110+
Builtin.cmp_${s}le_${Builtin}(a._storage._value, b._storage._value)
111+
)
112+
}
113+
114+
/// A vector mask with the result of a pointwise greater-than comparison.
115+
@_alwaysEmitIntoClient
116+
public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
117+
SIMDMask<MaskStorage>(
118+
Builtin.cmp_${s}gt_${Builtin}(a._storage._value, b._storage._value)
119+
)
120+
}
121+
122+
/// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
123+
@_alwaysEmitIntoClient
124+
public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
125+
SIMDMask<MaskStorage>(
126+
Builtin.cmp_${s}ge_${Builtin}(a._storage._value, b._storage._value)
127+
)
128+
}
129+
130+
/// The wrapping sum of two vectors.
131+
@_alwaysEmitIntoClient
132+
public static func &+(a: Self, b: Self) -> Self {
133+
Self(Builtin.add_${Builtin}(a._storage._value, b._storage._value))
134+
}
135+
136+
/// The wrapping difference of two vectors.
137+
@_alwaysEmitIntoClient
138+
public static func &-(a: Self, b: Self) -> Self {
139+
Self(Builtin.sub_${Builtin}(a._storage._value, b._storage._value))
140+
}
141+
142+
/// The pointwise wrapping product of two vectors.
143+
@_alwaysEmitIntoClient
144+
public static func &*(a: Self, b: Self) -> Self {
145+
Self(Builtin.mul_${Builtin}(a._storage._value, b._storage._value))
146+
}
147+
148+
/// Updates the left hand side with the wrapping sum of the two
149+
/// vectors.
150+
@_alwaysEmitIntoClient
151+
public static func &+=(a: inout Self, b: Self) { a = a &+ b }
152+
153+
/// Updates the left hand side with the wrapping difference of the two
154+
/// vectors.
155+
@_alwaysEmitIntoClient
156+
public static func &-=(a: inout Self, b: Self) { a = a &- b }
157+
158+
/// Updates the left hand side with the pointwise wrapping product of two
159+
/// vectors.
160+
@_alwaysEmitIntoClient
161+
public static func &*=(a: inout Self, b: Self) { a = a &* b }
162+
}
163+
164+
% end
165+
%end
166+
167+
%for (Scalar, bits) in [('Float16',16), ('Float',32), ('Double',64)]:
168+
% for n in vectorscalarCounts:
169+
% Vector = "SIMD" + str(n) + "<" + Scalar + ">"
170+
% storageN = 4 if n == 3 else n
171+
% Builtin = "Vec" + str(storageN) + "xFPIEEE" + str(bits)
172+
% if bits == 16:
173+
#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64))
174+
@available(macOS 11.0, iOS 14.0, watchOS 7.0, tvOS 14.0, *)
175+
% end
176+
extension SIMD${n} where Scalar == ${Scalar} {
177+
@_alwaysEmitIntoClient
178+
internal init(_ _builtin: Builtin.${Builtin}) {
179+
_storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
180+
}
181+
182+
/// A vector mask with the result of a pointwise equality comparison.
183+
@_alwaysEmitIntoClient
184+
public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
185+
SIMDMask<MaskStorage>(
186+
Builtin.fcmp_oeq_${Builtin}(a._storage._value, b._storage._value)
187+
)
188+
}
189+
190+
/// A vector mask with the result of a pointwise inequality comparison.
191+
@_alwaysEmitIntoClient
192+
public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
193+
SIMDMask<MaskStorage>(
194+
Builtin.fcmp_une_${Builtin}(a._storage._value, b._storage._value)
195+
)
196+
}
197+
198+
/// A vector mask with the result of a pointwise less-than comparison.
199+
@_alwaysEmitIntoClient
200+
public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
201+
SIMDMask<MaskStorage>(
202+
Builtin.fcmp_olt_${Builtin}(a._storage._value, b._storage._value)
203+
)
204+
}
205+
206+
/// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
207+
@_alwaysEmitIntoClient
208+
public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
209+
SIMDMask<MaskStorage>(
210+
Builtin.fcmp_ole_${Builtin}(a._storage._value, b._storage._value)
211+
)
212+
}
213+
214+
/// A vector mask with the result of a pointwise greater-than comparison.
215+
@_alwaysEmitIntoClient
216+
public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
217+
SIMDMask<MaskStorage>(
218+
Builtin.fcmp_ogt_${Builtin}(a._storage._value, b._storage._value)
219+
)
220+
}
221+
222+
/// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
223+
@_alwaysEmitIntoClient
224+
public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
225+
SIMDMask<MaskStorage>(
226+
Builtin.fcmp_oge_${Builtin}(a._storage._value, b._storage._value)
227+
)
228+
}
229+
}
230+
% if bits == 16:
231+
#endif
232+
% end
233+
234+
% end
235+
%end

stdlib/public/core/SIMDVector.swift

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ extension SIMD {
152152
}
153153
}
154154

155-
/// Returns a vector mask with the result of a pointwise equality comparison.
155+
/// A vector mask with the result of a pointwise equality comparison.
156156
@_transparent
157157
public static func .==(lhs: Self, rhs: Self) -> SIMDMask<MaskStorage> {
158158
var result = SIMDMask<MaskStorage>()
@@ -822,26 +822,28 @@ extension SIMD where Scalar: FloatingPoint {
822822
return result
823823
}
824824

825+
/// A vector formed by rounding each lane of the source vector to an integral
826+
/// value according to the specified rounding `rule`.
825827
@_transparent
826828
public func rounded(_ rule: FloatingPointRoundingRule) -> Self {
827829
var result = Self()
828830
for i in result.indices { result[i] = self[i].rounded(rule) }
829831
return result
830832
}
831833

832-
/// Returns the least scalar in the vector.
834+
/// The least scalar in the vector.
833835
@_alwaysEmitIntoClient
834836
public func min() -> Scalar {
835837
return indices.reduce(into: self[0]) { $0 = Scalar.minimum($0, self[$1]) }
836838
}
837839

838-
/// Returns the greatest scalar in the vector.
840+
/// The greatest scalar in the vector.
839841
@_alwaysEmitIntoClient
840842
public func max() -> Scalar {
841843
return indices.reduce(into: self[0]) { $0 = Scalar.maximum($0, self[$1]) }
842844
}
843845

844-
/// Returns the sum of the scalars in the vector.
846+
/// The sum of the scalars in the vector.
845847
@_alwaysEmitIntoClient
846848
public func sum() -> Scalar {
847849
// Implementation note: this eventually be defined to lower to either

stdlib/public/core/SIMDVectorTypes.swift.gyb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,11 @@ extension ${Self}: SIMDScalar {
234234
public init() {
235235
_value = Builtin.zeroInitializer()
236236
}
237+
238+
@_alwaysEmitIntoClient
239+
internal init(_ _builtin: Builtin.Vec${n}x${BuiltinName}) {
240+
_value = _builtin
241+
}
237242

238243
public subscript(index: Int) -> ${Self} {
239244
@_transparent
@@ -283,6 +288,11 @@ extension ${Self} : SIMDScalar {
283288
public init() {
284289
_value = Builtin.zeroInitializer()
285290
}
291+
292+
@_alwaysEmitIntoClient
293+
internal init(_ _builtin: Builtin.Vec${n}xFPIEEE${bits}) {
294+
_value = _builtin
295+
}
286296

287297
public subscript(index: Int) -> ${Self} {
288298
@_transparent

0 commit comments

Comments
 (0)