Skip to content

Commit 48882db

Browse files
authored
Fix relationals builtin translation bug (#1962)
According OpenCL spec, the relationals builtin function shall return a '1' if the specified relation is true for scalar argument types. The functions shall return a '-1' (i.e. all bits set) if the specified relation is true for vector argument types.
1 parent 858ca37 commit 48882db

File tree

2 files changed

+137
-5
lines changed

2 files changed

+137
-5
lines changed

lib/SPIRV/OCLToSPIRV.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,11 +1137,14 @@ void OCLToSPIRVBase::visitCallRelational(CallInst *CI,
11371137
OCLSPIRVBuiltinMap::find(DemangledName.str(), &OC);
11381138
// i1 or <i1 x N>, depending on whether it returns a vector type.
11391139
Type *BoolTy = CI->getType()->getWithNewType(Type::getInt1Ty(*Ctx));
1140-
mutateCallInst(CI, OC).changeReturnType(BoolTy, [=](IRBuilder<> &Builder,
1141-
CallInst *NewCI) {
1142-
return Builder.CreateSelect(NewCI, Constant::getAllOnesValue(CI->getType()),
1143-
Constant::getNullValue(CI->getType()));
1144-
});
1140+
mutateCallInst(CI, OC).changeReturnType(
1141+
BoolTy, [=](IRBuilder<> &Builder, CallInst *NewCI) {
1142+
Value *TrueOp = CI->getType()->isVectorTy()
1143+
? Constant::getAllOnesValue(CI->getType())
1144+
: getInt32(M, 1);
1145+
return Builder.CreateSelect(NewCI, TrueOp,
1146+
Constant::getNullValue(CI->getType()));
1147+
});
11451148
}
11461149

11471150
void OCLToSPIRVBase::visitCallVecLoadStore(CallInst *CI, StringRef MangledName,
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
; RUN: llvm-as %s -o %t.bc
2+
; RUN: llvm-spirv %t.bc -o %t.spv
3+
; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.rev.bc
4+
; RUN: llvm-dis < %t.rev.bc | FileCheck %s
5+
6+
; This test checks following relational builtins with scalar type
7+
8+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
9+
target triple = "spir64"
10+
11+
; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
12+
declare spir_func i32 @_Z8isfinitef(float) local_unnamed_addr #1
13+
14+
; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
15+
declare spir_func i32 @_Z5isinff(float) local_unnamed_addr #1
16+
17+
; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
18+
declare spir_func i32 @_Z5isnanf(float) local_unnamed_addr #1
19+
20+
; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
21+
declare spir_func i32 @_Z8isnormalf(float) local_unnamed_addr #1
22+
23+
; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
24+
declare spir_func i32 @_Z7signbitf(float) local_unnamed_addr #1
25+
26+
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
27+
declare spir_func <4 x i32> @_Z8isnormalDv4_f(<4 x float> noundef) local_unnamed_addr #1
28+
29+
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
30+
declare spir_func <4 x i32> @_Z8isfiniteDv4_f(<4 x float> noundef) local_unnamed_addr #1
31+
32+
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
33+
declare spir_func <4 x i32> @_Z5isnanDv4_f(<4 x float> noundef) local_unnamed_addr #1
34+
35+
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
36+
declare spir_func <4 x i32> @_Z5isinfDv4_f(<4 x float> noundef) local_unnamed_addr #1
37+
38+
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
39+
declare spir_func <4 x i32> @_Z7signbitDv4_f(<4 x float> noundef) local_unnamed_addr #1
40+
41+
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite)
42+
define dso_local spir_kernel void @math_kernel_scalar(i32 addrspace(4)* nocapture writeonly %out, float %f) local_unnamed_addr #0 {
43+
entry:
44+
; CHECK: [[DATA0:%.*]] = call spir_func i32 @_Z8isfinitef(float [[ARG0:%.*]])
45+
; CHECK-NEXT: [[DATA1:%.*]] = trunc i32 [[DATA0]] to i1
46+
; CHECK-NEXT: [[CALL0:%.*]] = select i1 [[DATA1]], i32 1, i32 0
47+
%call = tail call spir_func i32 @_Z8isfinitef(float %f) #2
48+
49+
; CHECK: [[DATA2:%.*]] = call spir_func i32 @_Z5isinff(float [[ARG0]])
50+
; CHECK-NEXT: [[DATA3:%.*]] = trunc i32 [[DATA2]] to i1
51+
; CHECK-NEXT: [[CALL1:%.*]] = select i1 [[DATA3]], i32 1, i32 0
52+
%call1 = tail call spir_func i32 @_Z5isinff(float %f) #2
53+
%add = add nsw i32 %call1, %call
54+
55+
; CHECK: [[DATA4:%.*]] = call spir_func i32 @_Z5isnanf(float [[ARG0]])
56+
; CHECK-NEXT: [[DATA5:%.*]] = trunc i32 [[DATA4]] to i1
57+
; CHECK-NEXT: [[CALL2:%.*]] = select i1 [[DATA5]], i32 1, i32 0
58+
%call2 = tail call spir_func i32 @_Z5isnanf(float %f) #2
59+
%add3 = add nsw i32 %add, %call2
60+
61+
; CHECK: [[DATA6:%.*]] = call spir_func i32 @_Z8isnormalf(float [[ARG0]])
62+
; CHECK-NEXT: [[DATA7:%.*]] = trunc i32 [[DATA6]] to i1
63+
; CHECK-NEXT: [[CALL3:%.*]] = select i1 [[DATA7]], i32 1, i32 0
64+
%call4 = tail call spir_func i32 @_Z8isnormalf(float %f) #2
65+
%add5 = add nsw i32 %add3, %call4
66+
67+
; CHECK: [[DATA8:%.*]] = call spir_func i32 @_Z7signbitf(float [[ARG0]])
68+
; CHECK-NEXT: [[DATA9:%.*]] = trunc i32 [[DATA8]] to i1
69+
; CHECK-NEXT: [[CALL4:%.*]] = select i1 [[DATA9]], i32 1, i32 0
70+
%call6 = tail call spir_func i32 @_Z7signbitf(float %f) #2
71+
%add7 = add nsw i32 %add5, %call6
72+
73+
%arg1 = alloca <4 x float>, align 16
74+
%v = load <4 x float>, <4 x float>* %arg1, align 16
75+
; CHECK: [[DATA10:%.*]] = call spir_func <4 x i32> @_Z8isnormalDv4_f(<4 x float> [[ARG1:%.*]]) #0
76+
; CHECK-NEXT: [[DATA11:%.*]] = trunc <4 x i32> [[DATA10]] to <4 x i8>
77+
; CHECK-NEXT: [[DATA12:%.*]] = trunc <4 x i8> [[DATA11]] to <4 x i1>
78+
; CHECK-NEXT: [[CALL5:%.*]] = select <4 x i1> [[DATA12]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
79+
%call7 = tail call spir_func <4 x i32> @_Z8isnormalDv4_f(<4 x float> noundef %v) #2
80+
81+
; CHECK: [[DATA13:%.*]] = call spir_func <4 x i32> @_Z8isfiniteDv4_f(<4 x float> [[ARG1]]) #0
82+
; CHECK-NEXT: [[DATA14:%.*]] = trunc <4 x i32> [[DATA13]] to <4 x i8>
83+
; CHECK-NEXT: [[DATA15:%.*]] = trunc <4 x i8> [[DATA14]] to <4 x i1>
84+
; CHECK-NEXT: [[CALL6:%.*]] = select <4 x i1> [[DATA15]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
85+
%call8 = tail call spir_func <4 x i32> @_Z8isfiniteDv4_f(<4 x float> noundef %v) #2
86+
87+
; CHECK: [[DATA16:%.*]] = call spir_func <4 x i32> @_Z5isnanDv4_f(<4 x float> [[ARG1]]) #0
88+
; CHECK-NEXT: [[DATA17:%.*]] = trunc <4 x i32> [[DATA16]] to <4 x i8>
89+
; CHECK-NEXT: [[DATA18:%.*]] = trunc <4 x i8> [[DATA17]] to <4 x i1>
90+
; CHECK-NEXT: [[CALL7:%.*]] = select <4 x i1> [[DATA18]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
91+
%call9 = tail call spir_func <4 x i32> @_Z5isnanDv4_f(<4 x float> noundef %v) #2
92+
93+
; CHECK: [[DATA19:%.*]] = call spir_func <4 x i32> @_Z5isinfDv4_f(<4 x float> [[ARG1]]) #0
94+
; CHECK-NEXT: [[DATA20:%.*]] = trunc <4 x i32> [[DATA19]] to <4 x i8>
95+
; CHECK-NEXT: [[DATA21:%.*]] = trunc <4 x i8> [[DATA20]] to <4 x i1>
96+
; CHECK-NEXT: [[CALL8:%.*]] = select <4 x i1> [[DATA21]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
97+
%call10 = tail call spir_func <4 x i32> @_Z5isinfDv4_f(<4 x float> noundef %v) #2
98+
99+
; CHECK: [[DATA22:%.*]] = call spir_func <4 x i32> @_Z7signbitDv4_f(<4 x float> [[ARG1]]) #0
100+
; CHECK-NEXT: [[DATA23:%.*]] = trunc <4 x i32> [[DATA22]] to <4 x i8>
101+
; CHECK-NEXT: [[DATA24:%.*]] = trunc <4 x i8> [[DATA23]] to <4 x i1>
102+
; CHECK-NEXT: [[CALL9:%.*]] = select <4 x i1> [[DATA24]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
103+
%call11 = tail call spir_func <4 x i32> @_Z7signbitDv4_f(<4 x float> noundef %v) #2
104+
ret void
105+
}
106+
107+
attributes #0 = { convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
108+
attributes #1 = { convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
109+
attributes #2 = { convergent nounwind willreturn memory(none) }
110+
111+
!llvm.module.flags = !{!0}
112+
!opencl.ocl.version = !{!1}
113+
!opencl.spir.version = !{!1}
114+
!llvm.ident = !{!2}
115+
116+
!0 = !{i32 1, !"wchar_size", i32 4}
117+
!1 = !{i32 3, i32 0}
118+
!2 = !{!"clang version 16.0.0"}
119+
!3 = !{i32 1, i32 1}
120+
!4 = !{!"none", !"none"}
121+
!5 = !{!"long*", !"double*"}
122+
!6 = !{!"", !""}
123+
!7 = !{!8, !8, i64 0}
124+
!8 = !{!"double", !9, i64 0}
125+
!9 = !{!"omnipotent char", !10, i64 0}
126+
!10 = !{!"Simple C/C++ TBAA"}
127+
!11 = !{!12, !12, i64 0}
128+
!12 = !{!"long", !9, i64 0}
129+

0 commit comments

Comments
 (0)