Skip to content

Commit f58cfac

Browse files
authored
[AggressiveInstCombine] Expand memchr with small constant strings (#98501)
This patch converts memchr with a small constant string into a switch. It will reduce overhead of libcall and enable more folds (e.g., comparing the result with null). References: https://en.cppreference.com/w/c/string/byte/memchr
1 parent 47f3d55 commit f58cfac

File tree

2 files changed

+249
-0
lines changed

2 files changed

+249
-0
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ static cl::opt<unsigned> StrNCmpInlineThreshold(
5454
cl::desc("The maximum length of a constant string for a builtin string cmp "
5555
"call eligible for inlining. The default value is 3."));
5656

57+
static cl::opt<unsigned>
58+
MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
59+
cl::desc("The maximum length of a constant string to "
60+
"inline a memchr call."));
61+
5762
/// Match a pattern for a bitwise funnel/rotate operation that partially guards
5863
/// against undefined behavior by branching around the funnel-shift/rotation
5964
/// when the shift amount is 0.
@@ -1103,6 +1108,81 @@ void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
11031108
}
11041109
}
11051110

1111+
/// Convert memchr with a small constant string into a switch
1112+
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
1113+
const DataLayout &DL) {
1114+
if (isa<Constant>(Call->getArgOperand(1)))
1115+
return false;
1116+
1117+
StringRef Str;
1118+
Value *Base = Call->getArgOperand(0);
1119+
if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
1120+
return false;
1121+
1122+
uint64_t N = Str.size();
1123+
if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
1124+
uint64_t Val = ConstInt->getZExtValue();
1125+
// Ignore the case that n is larger than the size of string.
1126+
if (Val > N)
1127+
return false;
1128+
N = Val;
1129+
} else
1130+
return false;
1131+
1132+
if (N > MemChrInlineThreshold)
1133+
return false;
1134+
1135+
BasicBlock *BB = Call->getParent();
1136+
BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
1137+
IRBuilder<> IRB(BB);
1138+
IntegerType *ByteTy = IRB.getInt8Ty();
1139+
BB->getTerminator()->eraseFromParent();
1140+
SwitchInst *SI = IRB.CreateSwitch(
1141+
IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
1142+
Type *IndexTy = DL.getIndexType(Call->getType());
1143+
SmallVector<DominatorTree::UpdateType, 8> Updates;
1144+
1145+
BasicBlock *BBSuccess = BasicBlock::Create(
1146+
Call->getContext(), "memchr.success", BB->getParent(), BBNext);
1147+
IRB.SetInsertPoint(BBSuccess);
1148+
PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
1149+
Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
1150+
IRB.CreateBr(BBNext);
1151+
if (DTU)
1152+
Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
1153+
1154+
SmallPtrSet<ConstantInt *, 4> Cases;
1155+
for (uint64_t I = 0; I < N; ++I) {
1156+
ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]);
1157+
if (!Cases.insert(CaseVal).second)
1158+
continue;
1159+
1160+
BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
1161+
BB->getParent(), BBSuccess);
1162+
SI->addCase(CaseVal, BBCase);
1163+
IRB.SetInsertPoint(BBCase);
1164+
IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
1165+
IRB.CreateBr(BBSuccess);
1166+
if (DTU) {
1167+
Updates.push_back({DominatorTree::Insert, BB, BBCase});
1168+
Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
1169+
}
1170+
}
1171+
1172+
PHINode *PHI =
1173+
PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
1174+
PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
1175+
PHI->addIncoming(FirstOccursLocation, BBSuccess);
1176+
1177+
Call->replaceAllUsesWith(PHI);
1178+
Call->eraseFromParent();
1179+
1180+
if (DTU)
1181+
DTU->applyUpdates(Updates);
1182+
1183+
return true;
1184+
}
1185+
11061186
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
11071187
TargetLibraryInfo &TLI, AssumptionCache &AC,
11081188
DominatorTree &DT, const DataLayout &DL,
@@ -1135,6 +1215,12 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
11351215
return true;
11361216
}
11371217
break;
1218+
case LibFunc_memchr:
1219+
if (foldMemChr(CI, &DTU, DL)) {
1220+
MadeCFGChange = true;
1221+
return true;
1222+
}
1223+
break;
11381224
default:;
11391225
}
11401226
return false;
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=aggressive-instcombine --memchr-inline-threshold=5 < %s | FileCheck %s
3+
4+
@str = constant [5 x i8] c"01\002\00", align 1
5+
@str_long = constant [8 x i8] c"0123456\00", align 1
6+
7+
declare ptr @memchr(ptr, i32, i64)
8+
9+
define i1 @test_memchr_null(i32 %x) {
10+
; CHECK-LABEL: define i1 @test_memchr_null(
11+
; CHECK-SAME: i32 [[X:%.*]]) {
12+
; CHECK-NEXT: [[ENTRY:.*]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[X]] to i8
14+
; CHECK-NEXT: switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
15+
; CHECK-NEXT: i8 48, label %[[MEMCHR_CASE:.*]]
16+
; CHECK-NEXT: i8 49, label %[[MEMCHR_CASE1:.*]]
17+
; CHECK-NEXT: i8 0, label %[[MEMCHR_CASE2:.*]]
18+
; CHECK-NEXT: i8 50, label %[[MEMCHR_CASE3:.*]]
19+
; CHECK-NEXT: ]
20+
; CHECK: [[MEMCHR_CASE]]:
21+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS:.*]]
22+
; CHECK: [[MEMCHR_CASE1]]:
23+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
24+
; CHECK: [[MEMCHR_CASE2]]:
25+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
26+
; CHECK: [[MEMCHR_CASE3]]:
27+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
28+
; CHECK: [[MEMCHR_SUCCESS]]:
29+
; CHECK-NEXT: [[MEMCHR_IDX:%.*]] = phi i64 [ 0, %[[MEMCHR_CASE]] ], [ 1, %[[MEMCHR_CASE1]] ], [ 2, %[[MEMCHR_CASE2]] ], [ 3, %[[MEMCHR_CASE3]] ]
30+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @str, i64 [[MEMCHR_IDX]]
31+
; CHECK-NEXT: br label %[[ENTRY_SPLIT]]
32+
; CHECK: [[ENTRY_SPLIT]]:
33+
; CHECK-NEXT: [[MEMCHR4:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[TMP1]], %[[MEMCHR_SUCCESS]] ]
34+
; CHECK-NEXT: [[ISNULL:%.*]] = icmp eq ptr [[MEMCHR4]], null
35+
; CHECK-NEXT: ret i1 [[ISNULL]]
36+
;
37+
entry:
38+
%memchr = call ptr @memchr(ptr @str, i32 %x, i64 5)
39+
%isnull = icmp eq ptr %memchr, null
40+
ret i1 %isnull
41+
}
42+
43+
define ptr @test_memchr(i32 %x) {
44+
; CHECK-LABEL: define ptr @test_memchr(
45+
; CHECK-SAME: i32 [[X:%.*]]) {
46+
; CHECK-NEXT: [[ENTRY:.*]]:
47+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[X]] to i8
48+
; CHECK-NEXT: switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
49+
; CHECK-NEXT: i8 48, label %[[MEMCHR_CASE:.*]]
50+
; CHECK-NEXT: i8 49, label %[[MEMCHR_CASE1:.*]]
51+
; CHECK-NEXT: i8 0, label %[[MEMCHR_CASE2:.*]]
52+
; CHECK-NEXT: i8 50, label %[[MEMCHR_CASE3:.*]]
53+
; CHECK-NEXT: ]
54+
; CHECK: [[MEMCHR_CASE]]:
55+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS:.*]]
56+
; CHECK: [[MEMCHR_CASE1]]:
57+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
58+
; CHECK: [[MEMCHR_CASE2]]:
59+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
60+
; CHECK: [[MEMCHR_CASE3]]:
61+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
62+
; CHECK: [[MEMCHR_SUCCESS]]:
63+
; CHECK-NEXT: [[MEMCHR_IDX:%.*]] = phi i64 [ 0, %[[MEMCHR_CASE]] ], [ 1, %[[MEMCHR_CASE1]] ], [ 2, %[[MEMCHR_CASE2]] ], [ 3, %[[MEMCHR_CASE3]] ]
64+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @str, i64 [[MEMCHR_IDX]]
65+
; CHECK-NEXT: br label %[[ENTRY_SPLIT]]
66+
; CHECK: [[ENTRY_SPLIT]]:
67+
; CHECK-NEXT: [[MEMCHR4:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[TMP1]], %[[MEMCHR_SUCCESS]] ]
68+
; CHECK-NEXT: ret ptr [[MEMCHR4]]
69+
;
70+
entry:
71+
%memchr = call ptr @memchr(ptr @str, i32 %x, i64 5)
72+
ret ptr %memchr
73+
}
74+
75+
define ptr @test_memchr_smaller_n(i32 %x) {
76+
; CHECK-LABEL: define ptr @test_memchr_smaller_n(
77+
; CHECK-SAME: i32 [[X:%.*]]) {
78+
; CHECK-NEXT: [[ENTRY:.*]]:
79+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[X]] to i8
80+
; CHECK-NEXT: switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
81+
; CHECK-NEXT: i8 48, label %[[MEMCHR_CASE:.*]]
82+
; CHECK-NEXT: i8 49, label %[[MEMCHR_CASE1:.*]]
83+
; CHECK-NEXT: i8 0, label %[[MEMCHR_CASE2:.*]]
84+
; CHECK-NEXT: ]
85+
; CHECK: [[MEMCHR_CASE]]:
86+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS:.*]]
87+
; CHECK: [[MEMCHR_CASE1]]:
88+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
89+
; CHECK: [[MEMCHR_CASE2]]:
90+
; CHECK-NEXT: br label %[[MEMCHR_SUCCESS]]
91+
; CHECK: [[MEMCHR_SUCCESS]]:
92+
; CHECK-NEXT: [[MEMCHR_IDX:%.*]] = phi i64 [ 0, %[[MEMCHR_CASE]] ], [ 1, %[[MEMCHR_CASE1]] ], [ 2, %[[MEMCHR_CASE2]] ]
93+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @str, i64 [[MEMCHR_IDX]]
94+
; CHECK-NEXT: br label %[[ENTRY_SPLIT]]
95+
; CHECK: [[ENTRY_SPLIT]]:
96+
; CHECK-NEXT: [[MEMCHR3:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[TMP1]], %[[MEMCHR_SUCCESS]] ]
97+
; CHECK-NEXT: ret ptr [[MEMCHR3]]
98+
;
99+
entry:
100+
%memchr = call ptr @memchr(ptr @str, i32 %x, i64 3)
101+
ret ptr %memchr
102+
}
103+
104+
; negative tests
105+
106+
define ptr @test_memchr_larger_n(i32 %x) {
107+
; CHECK-LABEL: define ptr @test_memchr_larger_n(
108+
; CHECK-SAME: i32 [[X:%.*]]) {
109+
; CHECK-NEXT: [[ENTRY:.*:]]
110+
; CHECK-NEXT: [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 [[X]], i64 6)
111+
; CHECK-NEXT: ret ptr [[MEMCHR]]
112+
;
113+
entry:
114+
%memchr = call ptr @memchr(ptr @str, i32 %x, i64 6)
115+
ret ptr %memchr
116+
}
117+
118+
define ptr @test_memchr_non_constant(i32 %x, ptr %str) {
119+
; CHECK-LABEL: define ptr @test_memchr_non_constant(
120+
; CHECK-SAME: i32 [[X:%.*]], ptr [[STR:%.*]]) {
121+
; CHECK-NEXT: [[ENTRY:.*:]]
122+
; CHECK-NEXT: [[MEMCHR:%.*]] = call ptr @memchr(ptr [[STR]], i32 [[X]], i64 5)
123+
; CHECK-NEXT: ret ptr [[MEMCHR]]
124+
;
125+
entry:
126+
%memchr = call ptr @memchr(ptr %str, i32 %x, i64 5)
127+
ret ptr %memchr
128+
}
129+
130+
define ptr @test_memchr_constant_ch() {
131+
; CHECK-LABEL: define ptr @test_memchr_constant_ch() {
132+
; CHECK-NEXT: [[ENTRY:.*:]]
133+
; CHECK-NEXT: [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 49, i64 5)
134+
; CHECK-NEXT: ret ptr [[MEMCHR]]
135+
;
136+
entry:
137+
%memchr = call ptr @memchr(ptr @str, i32 49, i64 5)
138+
ret ptr %memchr
139+
}
140+
141+
define ptr @test_memchr_dynamic_n(i32 %x, i32 %y) {
142+
; CHECK-LABEL: define ptr @test_memchr_dynamic_n(
143+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
144+
; CHECK-NEXT: [[ENTRY:.*:]]
145+
; CHECK-NEXT: [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 [[X]], i32 [[Y]])
146+
; CHECK-NEXT: ret ptr [[MEMCHR]]
147+
;
148+
entry:
149+
%memchr = call ptr @memchr(ptr @str, i32 %x, i32 %y)
150+
ret ptr %memchr
151+
}
152+
153+
define ptr @test_memchr_long(i32 %x) {
154+
; CHECK-LABEL: define ptr @test_memchr_long(
155+
; CHECK-SAME: i32 [[X:%.*]]) {
156+
; CHECK-NEXT: [[ENTRY:.*:]]
157+
; CHECK-NEXT: [[MEMCHR:%.*]] = call ptr @memchr(ptr @str_long, i32 [[X]], i64 8)
158+
; CHECK-NEXT: ret ptr [[MEMCHR]]
159+
;
160+
entry:
161+
%memchr = call ptr @memchr(ptr @str_long, i32 %x, i64 8)
162+
ret ptr %memchr
163+
}

0 commit comments

Comments
 (0)