Skip to content

Commit 12d9d6f

Browse files
authored
[ESIMD] Fix ESIMDOptimizeVecArgCallConv to cover more cases, add test. (#6873)
Unwrap sret memory type before comparison, to make vector type and a wrapper struct around the vector type equivalent. Follow-up for #6835 Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent 91a4cba commit 12d9d6f

File tree

2 files changed

+122
-64
lines changed

2 files changed

+122
-64
lines changed

llvm/lib/SYCLLowerIR/ESIMD/ESIMDOptimizeVecArgCallConv.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,28 @@ static bool isESIMDVectorTy(Type *T) {
4141
esimd::getVectorTyOrNull(dyn_cast<StructType>(T)) != nullptr;
4242
}
4343

44+
static Type *getVectorTypeOrNull(Type *T) {
45+
if (!T || T->isVectorTy()) {
46+
return T;
47+
}
48+
Type *Res = esimd::getVectorTyOrNull(dyn_cast<StructType>(T));
49+
return Res;
50+
}
51+
52+
// Checks types equivalence for the purpose if this optimization.
53+
// Thin struct wrapper types around a vector type are equivalent between them
54+
// and the vector type.
55+
static bool eq(Type *T1, Type *T2) {
56+
if (T1 == T2) {
57+
return true;
58+
}
59+
if (Type *T1V = getVectorTypeOrNull(T1)) {
60+
return T1V == getVectorTypeOrNull(T2);
61+
} else {
62+
return false;
63+
}
64+
}
65+
4466
using NonMemUseHandlerT = std::function<bool(const Use *)>;
4567

4668
static Type *
@@ -64,7 +86,7 @@ getMemTypeIfSameAddressLoadsStores(SmallPtrSetImpl<const Use *> &Uses,
6486
ContentT = LI->getType();
6587
LoadMet = 1;
6688
continue;
67-
} else if (ContentT != LI->getType()) {
89+
} else if (!eq(ContentT, LI->getType())) {
6890
return nullptr;
6991
}
7092
}
@@ -78,7 +100,7 @@ getMemTypeIfSameAddressLoadsStores(SmallPtrSetImpl<const Use *> &Uses,
78100
ContentT = SI->getValueOperand()->getType();
79101
StoreMet = 1;
80102
continue;
81-
} else if (ContentT != SI->getValueOperand()->getType()) {
103+
} else if (!eq(ContentT, SI->getValueOperand()->getType())) {
82104
return nullptr;
83105
}
84106
}
@@ -88,7 +110,7 @@ getMemTypeIfSameAddressLoadsStores(SmallPtrSetImpl<const Use *> &Uses,
88110
return nullptr;
89111
}
90112
}
91-
return ContentT;
113+
return getVectorTypeOrNull(ContentT);
92114
}
93115

94116
static bool isSretParam(const Argument &P) {
@@ -152,8 +174,7 @@ Type *getPointedToTypeIfOptimizeable(const Argument &FormalParam) {
152174
const bool NonOptimizeableParam = !IsSret && (StoreMet || !LoadMet);
153175

154176
if (IsSret) {
155-
Type *ST = FormalParam.getParamStructRetType();
156-
Type *SretVecT = esimd::getVectorTyOrNull(dyn_cast<StructType>(ST));
177+
Type *SretVecT = getVectorTypeOrNull(FormalParam.getParamStructRetType());
157178

158179
if (!ContentT) {
159180
// Can happen when sret param is a "fall through" - return value is

llvm/test/SYCLLowerIR/ESIMD/vec_arg_call_conv.ll

Lines changed: 96 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,38 @@ target triple = "spir64-unknown-unknown"
1818

1919
@GRF = dso_local global %"class.sycl::_V1::ext::intel::esimd::simd.0" zeroinitializer, align 2048
2020

21+
; // Compilation: clang++ -fsycl -Xclang -opaque-pointers src.cpp
22+
; // Template for the source:
23+
;
24+
; #include <sycl/ext/intel/esimd.hpp>
25+
;
26+
; using namespace sycl::ext::intel::esimd;
27+
;
28+
; ESIMD_PRIVATE simd<float, 3 * 32 * 4> GRF;
29+
; #define V(x, w, i) (x).template select<w, 1>(i)
30+
;
31+
; // insert testcases here
32+
;
33+
; int main() {
34+
; return 0;
35+
; }
36+
2137
;----- Test1: "Fall-through case", incoming optimizeable parameter is just returned
38+
; __attribute__((noinline))
2239
; SYCL_EXTERNAL simd<float, 16> callee__sret__param(simd<float, 16> x) SYCL_ESIMD_FUNCTION {
2340
; return x;
2441
; }
42+
;
43+
; __attribute__((noinline))
44+
; SYCL_EXTERNAL simd<float, 16> test__sret__fall_through__arr(simd<float, 16> *x, int i) SYCL_ESIMD_FUNCTION {
45+
; return callee__sret__param(x[i]);
46+
; }
47+
;
48+
; __attribute__((noinline))
49+
; SYCL_EXTERNAL simd<float, 16> test__sret__fall_through__glob() SYCL_ESIMD_FUNCTION {
50+
; return callee__sret__param(V(GRF, 16, 0));
51+
; }
52+
;
2553
; Function Attrs: convergent noinline norecurse
2654
define dso_local spir_func void @_Z19callee__sret__param(ptr addrspace(4) noalias sret(%"class.sycl::_V1::ext::intel::esimd::simd") align 64 %agg.result, ptr noundef %x) local_unnamed_addr #0 !sycl_explicit_simd !8 !intel_reqd_sub_group_size !9 {
2755
; CHECK: define dso_local spir_func <16 x float> @_Z19callee__sret__param(<16 x float> %[[PARAM:.+]])
@@ -86,6 +114,16 @@ entry:
86114
; Check only signatures and calls in testcases below.
87115

88116
;----- Test2: Optimized parameter interleaves non-optimizeable ones.
117+
; __attribute__((noinline))
118+
; SYCL_EXTERNAL simd<int, 8> callee__sret__x_param_x(int i, simd<int, 8> x, int j) SYCL_ESIMD_FUNCTION {
119+
; return x + (i + j);
120+
; }
121+
;
122+
; __attribute__((noinline))
123+
; SYCL_EXTERNAL simd<int, 8> test__sret__x_param_x(simd<int, 8> x) SYCL_ESIMD_FUNCTION {
124+
; return callee__sret__x_param_x(2, x, 1);
125+
; }
126+
;
89127
; Function Attrs: convergent noinline norecurse
90128
define dso_local spir_func void @_Z23callee__sret__x_param_x(ptr addrspace(4) noalias sret(%"class.sycl::_V1::ext::intel::esimd::simd.2") align 32 %agg.result, i32 noundef %i, ptr noundef %x, i32 noundef %j) local_unnamed_addr #3 !sycl_explicit_simd !8 !intel_reqd_sub_group_size !9 {
91129
; CHECK: define dso_local spir_func <8 x i32> @_Z23callee__sret__x_param_x(i32 noundef %{{.*}}, <8 x i32> %{{.*}}, i32 noundef %{{.*}})
@@ -116,6 +154,21 @@ entry:
116154
}
117155

118156
;----- Test3: "2-level fall through", bottom-level callee
157+
; __attribute__((noinline))
158+
; SYCL_EXTERNAL simd<double, 32> callee__all_fall_through0(simd<double, 32> x) SYCL_ESIMD_FUNCTION {
159+
; return x;
160+
; }
161+
;
162+
; __attribute__((noinline))
163+
; SYCL_EXTERNAL simd<double, 32> callee__all_fall_through1(simd<double, 32> x) SYCL_ESIMD_FUNCTION {
164+
; return callee__all_fall_through0(x);
165+
; }
166+
;
167+
; __attribute__((noinline))
168+
; SYCL_EXTERNAL simd<double, 32> test__all_fall_through(simd<double, 32> x) SYCL_ESIMD_FUNCTION {
169+
; return callee__all_fall_through1(x);
170+
; }
171+
;
119172
; Function Attrs: convergent noinline norecurse
120173
define dso_local spir_func void @_Z25callee__all_fall_through0(ptr addrspace(4) noalias sret(%"class.sycl::_V1::ext::intel::esimd::simd.4") align 256 %agg.result, ptr noundef %x) local_unnamed_addr #5 !sycl_explicit_simd !8 !intel_reqd_sub_group_size !9 {
121174
; CHECK: define dso_local spir_func <32 x double> @_Z25callee__all_fall_through0(<32 x double> %{{.*}})
@@ -159,6 +212,49 @@ entry:
159212
; Function Attrs: alwaysinline nounwind readnone
160213
declare !genx_intrinsic_id !10 <16 x float> @llvm.genx.rdregionf.v16f32.v384f32.i16(<384 x float>, i32, i32, i32, i16, i32) #6
161214

215+
%"class.sycl::_V1::ext::intel::esimd::simd.6" = type { %"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl.6" }
216+
%"class.sycl::_V1::ext::intel::esimd::detail::simd_obj_impl.6" = type { <8 x i32> }
217+
218+
;----- Test4. First argument is passed by reference and updated in the callee,
219+
; must not be optimized.
220+
; __attribute__((noinline))
221+
; SYCL_EXTERNAL void callee_void__noopt_opt(simd<int, 8> &x, simd<int, 8> y) SYCL_ESIMD_FUNCTION {
222+
; x = x + y;
223+
; }
224+
;
225+
; __attribute__((noinline))
226+
; SYCL_EXTERNAL simd<int, 8> test__sret__noopt_opt(simd<int, 8> x) SYCL_ESIMD_FUNCTION {
227+
; callee_void__noopt_opt(x, x);
228+
; return x;
229+
; }
230+
;
231+
232+
define dso_local spir_func void @_Z22callee_void__noopt_opt(ptr addrspace(4) noundef %x, ptr noundef %y) !sycl_explicit_simd !8 !intel_reqd_sub_group_size !9 {
233+
; CHECK: define dso_local spir_func void @_Z22callee_void__noopt_opt(ptr addrspace(4) noundef %{{.*}}, <8 x i32> %{{.*}})
234+
entry:
235+
%y.ascast = addrspacecast ptr %y to ptr addrspace(4)
236+
%call.i.i1 = load <8 x i32>, ptr addrspace(4) %x, align 32
237+
%call.i5.i2 = load <8 x i32>, ptr addrspace(4) %y.ascast, align 32
238+
%add.i.i.i.i = add <8 x i32> %call.i.i1, %call.i5.i2
239+
store <8 x i32> %add.i.i.i.i, ptr addrspace(4) %x, align 32
240+
ret void
241+
}
242+
243+
define dso_local spir_func void @_Z21test__sret__noopt_opt(ptr addrspace(4) noalias sret(%"class.sycl::_V1::ext::intel::esimd::simd.6") align 32 %agg.result, ptr noundef %x) !sycl_explicit_simd !8 !intel_reqd_sub_group_size !9 {
244+
; CHECK: define dso_local spir_func <8 x i32> @_Z21test__sret__noopt_opt(ptr noundef %{{.*}})
245+
entry:
246+
%agg.tmp = alloca %"class.sycl::_V1::ext::intel::esimd::simd.6", align 32
247+
%agg.tmp.ascast = addrspacecast ptr %agg.tmp to ptr addrspace(4)
248+
%x.ascast = addrspacecast ptr %x to ptr addrspace(4)
249+
%call.i.i.i2 = load <8 x i32>, ptr addrspace(4) %x.ascast, align 32
250+
store <8 x i32> %call.i.i.i2, ptr addrspace(4) %agg.tmp.ascast, align 32
251+
call spir_func void @_Z22callee_void__noopt_opt(ptr addrspace(4) noundef align 32 dereferenceable(32) %x.ascast, ptr noundef nonnull %agg.tmp) #5
252+
; CHECK: call spir_func void @_Z22callee_void__noopt_opt(ptr addrspace(4) %{{.*}}, <8 x i32> %{{.*}})
253+
%call.i.i.i13 = load <8 x i32>, ptr addrspace(4) %x.ascast, align 32
254+
store <8 x i32> %call.i.i.i13, ptr addrspace(4) %agg.result, align 32
255+
ret void
256+
}
257+
162258
attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "min-legal-vector-width"="512" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../opaque_ptr.cpp" }
163259
attributes #1 = { alwaysinline convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
164260
attributes #2 = { convergent noinline norecurse "frame-pointer"="all" "min-legal-vector-width"="12288" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../opaque_ptr.cpp" }
@@ -188,62 +284,3 @@ attributes #7 = { convergent }
188284
!8 = !{}
189285
!9 = !{i32 1}
190286
!10 = !{i32 11881}
191-
192-
;------------------------------------------------------------------------------
193-
; Source is given below.
194-
; Compilation: clang++ -fsycl -Xclang -opaque-pointers src.cpp
195-
;
196-
; #include <sycl/ext/intel/esimd.hpp>
197-
;
198-
; using namespace sycl::ext::intel::esimd;
199-
;
200-
; ESIMD_PRIVATE simd<float, 3 * 32 * 4> GRF;
201-
; #define V(x, w, i) (x).template select<w, 1>(i)
202-
;
203-
; //------------------------
204-
;
205-
; __attribute__((noinline))
206-
; SYCL_EXTERNAL simd<float, 16> callee__sret__param(simd<float, 16> x) SYCL_ESIMD_FUNCTION {
207-
; return x;
208-
; }
209-
;
210-
; __attribute__((noinline))
211-
; SYCL_EXTERNAL simd<float, 16> test__sret__fall_through__arr(simd<float, 16> *x, int i) SYCL_ESIMD_FUNCTION {
212-
; return callee__sret__param(x[i]);
213-
; }
214-
;
215-
; __attribute__((noinline))
216-
; SYCL_EXTERNAL simd<float, 16> test__sret__fall_through__glob() SYCL_ESIMD_FUNCTION {
217-
; return callee__sret__param(V(GRF, 16, 0));
218-
; }
219-
;
220-
; //------------------------
221-
;
222-
; __attribute__((noinline))
223-
; SYCL_EXTERNAL simd<int, 8> callee__sret__x_param_x(int i, simd<int, 8> x, int j) SYCL_ESIMD_FUNCTION {
224-
; return x + (i + j);
225-
; }
226-
;
227-
; __attribute__((noinline))
228-
; SYCL_EXTERNAL simd<int, 8> test__sret__x_param_x(simd<int, 8> x) SYCL_ESIMD_FUNCTION {
229-
; return callee__sret__x_param_x(2, x, 1);
230-
; }
231-
; //------------------------
232-
; __attribute__((noinline))
233-
; SYCL_EXTERNAL simd<double, 32> callee__all_fall_through0(simd<double, 32> x) SYCL_ESIMD_FUNCTION {
234-
; return x;
235-
; }
236-
;
237-
; __attribute__((noinline))
238-
; SYCL_EXTERNAL simd<double, 32> callee__all_fall_through1(simd<double, 32> x) SYCL_ESIMD_FUNCTION {
239-
; return callee__all_fall_through0(x);
240-
; }
241-
;
242-
; __attribute__((noinline))
243-
; SYCL_EXTERNAL simd<double, 32> test__all_fall_through(simd<double, 32> x) SYCL_ESIMD_FUNCTION {
244-
; return callee__all_fall_through1(x);
245-
; }
246-
;
247-
; int main() {
248-
; return 0;
249-
; }

0 commit comments

Comments
 (0)