Skip to content

Commit 7298ae3

Browse files
authored
[clang][CodeGen] Fix in codegen for __builtin_popcountg/ctzg/clzg (#90845)
Make sure that the result from the popcnt/ctlz/cttz intrinsics is unsigned casted to int, rather than casted as a signed value, when expanding the __builtin_popcountg/__builtin_ctzg/__builtin_clzg builtins. An example would be unsigned _BitInt(1) x = ...; int y = __builtin_popcountg(x); which previously was incorrectly expanded to %1 = call i1 @llvm.ctpop.i1(i1 %0) %cast = sext i1 %1 to i32 Since the input type is generic for those "g" versions of the builtins the intrinsic call may return a value for which the sign bit is set (that could typically for BitInt of size 1 and 2). So we need to emit a zext rather than a sext to avoid negative results.
1 parent b24aeef commit 7298ae3

File tree

3 files changed

+142
-16
lines changed

3 files changed

+142
-16
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3239,8 +3239,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
32393239
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
32403240
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
32413241
if (Result->getType() != ResultType)
3242-
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3243-
"cast");
3242+
Result =
3243+
Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
32443244
if (!HasFallback)
32453245
return RValue::get(Result);
32463246

@@ -3271,8 +3271,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
32713271
Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
32723272
Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
32733273
if (Result->getType() != ResultType)
3274-
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3275-
"cast");
3274+
Result =
3275+
Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
32763276
if (!HasFallback)
32773277
return RValue::get(Result);
32783278

@@ -3351,8 +3351,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
33513351
llvm::Type *ResultType = ConvertType(E->getType());
33523352
Value *Result = Builder.CreateCall(F, ArgValue);
33533353
if (Result->getType() != ResultType)
3354-
Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3355-
"cast");
3354+
Result =
3355+
Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
33563356
return RValue::get(Result);
33573357
}
33583358
case Builtin::BI__builtin_unpredictable: {

clang/test/CodeGen/builtins-bitint.c

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2+
// RUN: %clang_cc1 -triple arm-unknown-unknown -O0 -std=c23 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-O0
3+
// RUN: %clang_cc1 -triple arm-unknown-unknown -O1 -std=c23 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-O1
4+
5+
// Verify that the result from the intrinsic call is zero extended to avoid that
6+
// we get a negative result from popcountg/ctzg/clzg.
7+
8+
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi1(
9+
// CHECK-O0-SAME: ) #[[ATTR0:[0-9]+]] {
10+
// CHECK-O0-NEXT: entry:
11+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
12+
// CHECK-O0-NEXT: store i1 true, ptr [[A]], align 1
13+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
14+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctpop.i1(i1 [[TMP0]])
15+
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
16+
// CHECK-O0-NEXT: ret i32 [[CAST]]
17+
//
18+
// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_popcountg_ubi1(
19+
// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
20+
// CHECK-O1-NEXT: entry:
21+
// CHECK-O1-NEXT: ret i32 1
22+
//
23+
int test_popcountg_ubi1() {
24+
unsigned _BitInt(1) a = 1uwb;
25+
return __builtin_popcountg(a);
26+
}
27+
28+
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_popcountg_ubi2(
29+
// CHECK-O0-SAME: ) #[[ATTR0]] {
30+
// CHECK-O0-NEXT: entry:
31+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
32+
// CHECK-O0-NEXT: store i2 -1, ptr [[A]], align 1
33+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
34+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctpop.i2(i2 [[TMP0]])
35+
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
36+
// CHECK-O0-NEXT: ret i32 [[CAST]]
37+
//
38+
// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_popcountg_ubi2(
39+
// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
40+
// CHECK-O1-NEXT: entry:
41+
// CHECK-O1-NEXT: ret i32 2
42+
//
43+
int test_popcountg_ubi2() {
44+
unsigned _BitInt(2) a = 3uwb;
45+
return __builtin_popcountg(a);
46+
}
47+
48+
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi1(
49+
// CHECK-O0-SAME: ) #[[ATTR0]] {
50+
// CHECK-O0-NEXT: entry:
51+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
52+
// CHECK-O0-NEXT: store i1 false, ptr [[A]], align 1
53+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
54+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.cttz.i1(i1 [[TMP0]], i1 false)
55+
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
56+
// CHECK-O0-NEXT: ret i32 [[CAST]]
57+
//
58+
// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_ctzg_ubi1(
59+
// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
60+
// CHECK-O1-NEXT: entry:
61+
// CHECK-O1-NEXT: ret i32 1
62+
//
63+
int test_ctzg_ubi1() {
64+
unsigned _BitInt(1) a = 0uwb;
65+
return __builtin_ctzg(a);
66+
}
67+
68+
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_ctzg_ubi2(
69+
// CHECK-O0-SAME: ) #[[ATTR0]] {
70+
// CHECK-O0-NEXT: entry:
71+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
72+
// CHECK-O0-NEXT: store i2 0, ptr [[A]], align 1
73+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
74+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.cttz.i2(i2 [[TMP0]], i1 false)
75+
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
76+
// CHECK-O0-NEXT: ret i32 [[CAST]]
77+
//
78+
// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_ctzg_ubi2(
79+
// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
80+
// CHECK-O1-NEXT: entry:
81+
// CHECK-O1-NEXT: ret i32 2
82+
//
83+
int test_ctzg_ubi2() {
84+
unsigned _BitInt(2) a = 0uwb;
85+
return __builtin_ctzg(a);
86+
}
87+
88+
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi1(
89+
// CHECK-O0-SAME: ) #[[ATTR0]] {
90+
// CHECK-O0-NEXT: entry:
91+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i1, align 1
92+
// CHECK-O0-NEXT: store i1 false, ptr [[A]], align 1
93+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i1, ptr [[A]], align 1
94+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i1 @llvm.ctlz.i1(i1 [[TMP0]], i1 false)
95+
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i1 [[TMP1]] to i32
96+
// CHECK-O0-NEXT: ret i32 [[CAST]]
97+
//
98+
// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_clzg_ubi1(
99+
// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
100+
// CHECK-O1-NEXT: entry:
101+
// CHECK-O1-NEXT: ret i32 1
102+
//
103+
int test_clzg_ubi1() {
104+
unsigned _BitInt(1) a = 0uwb;
105+
return __builtin_clzg(a);
106+
}
107+
108+
// CHECK-O0-LABEL: define dso_local arm_aapcscc i32 @test_clzg_ubi2(
109+
// CHECK-O0-SAME: ) #[[ATTR0]] {
110+
// CHECK-O0-NEXT: entry:
111+
// CHECK-O0-NEXT: [[A:%.*]] = alloca i2, align 1
112+
// CHECK-O0-NEXT: store i2 0, ptr [[A]], align 1
113+
// CHECK-O0-NEXT: [[TMP0:%.*]] = load i2, ptr [[A]], align 1
114+
// CHECK-O0-NEXT: [[TMP1:%.*]] = call i2 @llvm.ctlz.i2(i2 [[TMP0]], i1 false)
115+
// CHECK-O0-NEXT: [[CAST:%.*]] = zext i2 [[TMP1]] to i32
116+
// CHECK-O0-NEXT: ret i32 [[CAST]]
117+
//
118+
// CHECK-O1-LABEL: define dso_local arm_aapcscc noundef i32 @test_clzg_ubi2(
119+
// CHECK-O1-SAME: ) local_unnamed_addr #[[ATTR0]] {
120+
// CHECK-O1-NEXT: entry:
121+
// CHECK-O1-NEXT: ret i32 2
122+
//
123+
int test_clzg_ubi2() {
124+
unsigned _BitInt(2) a = 0uwb;
125+
return __builtin_clzg(a);
126+
}

clang/test/CodeGen/builtins.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -949,12 +949,12 @@ void test_builtin_popcountg(unsigned char uc, unsigned short us,
949949
pop = __builtin_popcountg(uc);
950950
// CHECK: %1 = load i8, ptr %uc.addr, align 1
951951
// CHECK-NEXT: %2 = call i8 @llvm.ctpop.i8(i8 %1)
952-
// CHECK-NEXT: %cast = sext i8 %2 to i32
952+
// CHECK-NEXT: %cast = zext i8 %2 to i32
953953
// CHECK-NEXT: store volatile i32 %cast, ptr %pop, align 4
954954
pop = __builtin_popcountg(us);
955955
// CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
956956
// CHECK-NEXT: %4 = call i16 @llvm.ctpop.i16(i16 %3)
957-
// CHECK-NEXT: %cast1 = sext i16 %4 to i32
957+
// CHECK-NEXT: %cast1 = zext i16 %4 to i32
958958
// CHECK-NEXT: store volatile i32 %cast1, ptr %pop, align 4
959959
pop = __builtin_popcountg(ui);
960960
// CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
@@ -992,12 +992,12 @@ void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui,
992992
lz = __builtin_clzg(uc);
993993
// CHECK: %1 = load i8, ptr %uc.addr, align 1
994994
// CHECK-NEXT: %2 = call i8 @llvm.ctlz.i8(i8 %1, i1 true)
995-
// CHECK-NEXT: %cast = sext i8 %2 to i32
995+
// CHECK-NEXT: %cast = zext i8 %2 to i32
996996
// CHECK-NEXT: store volatile i32 %cast, ptr %lz, align 4
997997
lz = __builtin_clzg(us);
998998
// CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
999999
// CHECK-NEXT: %4 = call i16 @llvm.ctlz.i16(i16 %3, i1 true)
1000-
// CHECK-NEXT: %cast1 = sext i16 %4 to i32
1000+
// CHECK-NEXT: %cast1 = zext i16 %4 to i32
10011001
// CHECK-NEXT: store volatile i32 %cast1, ptr %lz, align 4
10021002
lz = __builtin_clzg(ui);
10031003
// CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
@@ -1026,7 +1026,7 @@ void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui,
10261026
lz = __builtin_clzg(uc, sc);
10271027
// CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1
10281028
// CHECK-NEXT: %16 = call i8 @llvm.ctlz.i8(i8 %15, i1 true)
1029-
// CHECK-NEXT: %cast6 = sext i8 %16 to i32
1029+
// CHECK-NEXT: %cast6 = zext i8 %16 to i32
10301030
// CHECK-NEXT: %iszero = icmp eq i8 %15, 0
10311031
// CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1
10321032
// CHECK-NEXT: %conv = sext i8 %17 to i32
@@ -1035,7 +1035,7 @@ void test_builtin_clzg(unsigned char uc, unsigned short us, unsigned int ui,
10351035
lz = __builtin_clzg(us, uc);
10361036
// CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2
10371037
// CHECK-NEXT: %19 = call i16 @llvm.ctlz.i16(i16 %18, i1 true)
1038-
// CHECK-NEXT: %cast7 = sext i16 %19 to i32
1038+
// CHECK-NEXT: %cast7 = zext i16 %19 to i32
10391039
// CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0
10401040
// CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1
10411041
// CHECK-NEXT: %conv9 = zext i8 %20 to i32
@@ -1094,12 +1094,12 @@ void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui,
10941094
tz = __builtin_ctzg(uc);
10951095
// CHECK: %1 = load i8, ptr %uc.addr, align 1
10961096
// CHECK-NEXT: %2 = call i8 @llvm.cttz.i8(i8 %1, i1 true)
1097-
// CHECK-NEXT: %cast = sext i8 %2 to i32
1097+
// CHECK-NEXT: %cast = zext i8 %2 to i32
10981098
// CHECK-NEXT: store volatile i32 %cast, ptr %tz, align 4
10991099
tz = __builtin_ctzg(us);
11001100
// CHECK-NEXT: %3 = load i16, ptr %us.addr, align 2
11011101
// CHECK-NEXT: %4 = call i16 @llvm.cttz.i16(i16 %3, i1 true)
1102-
// CHECK-NEXT: %cast1 = sext i16 %4 to i32
1102+
// CHECK-NEXT: %cast1 = zext i16 %4 to i32
11031103
// CHECK-NEXT: store volatile i32 %cast1, ptr %tz, align 4
11041104
tz = __builtin_ctzg(ui);
11051105
// CHECK-NEXT: %5 = load i32, ptr %ui.addr, align 4
@@ -1128,7 +1128,7 @@ void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui,
11281128
tz = __builtin_ctzg(uc, sc);
11291129
// CHECK-NEXT: %15 = load i8, ptr %uc.addr, align 1
11301130
// CHECK-NEXT: %16 = call i8 @llvm.cttz.i8(i8 %15, i1 true)
1131-
// CHECK-NEXT: %cast6 = sext i8 %16 to i32
1131+
// CHECK-NEXT: %cast6 = zext i8 %16 to i32
11321132
// CHECK-NEXT: %iszero = icmp eq i8 %15, 0
11331133
// CHECK-NEXT: %17 = load i8, ptr %sc.addr, align 1
11341134
// CHECK-NEXT: %conv = sext i8 %17 to i32
@@ -1137,7 +1137,7 @@ void test_builtin_ctzg(unsigned char uc, unsigned short us, unsigned int ui,
11371137
tz = __builtin_ctzg(us, uc);
11381138
// CHECK-NEXT: %18 = load i16, ptr %us.addr, align 2
11391139
// CHECK-NEXT: %19 = call i16 @llvm.cttz.i16(i16 %18, i1 true)
1140-
// CHECK-NEXT: %cast7 = sext i16 %19 to i32
1140+
// CHECK-NEXT: %cast7 = zext i16 %19 to i32
11411141
// CHECK-NEXT: %iszero8 = icmp eq i16 %18, 0
11421142
// CHECK-NEXT: %20 = load i8, ptr %uc.addr, align 1
11431143
// CHECK-NEXT: %conv9 = zext i8 %20 to i32

0 commit comments

Comments
 (0)