Skip to content

Commit 5898979

Browse files
committed
BPF: support inlining __builtin_memcmp intrinsic call
Delyan Kratunov reported an issue where __builtin_memcmp is not inlined into simple load/compare instructions. This is a known issue. In the current state, __builtin_memcmp will be converted to memcmp call which won't work for bpf programs. This patch added support for expanding __builtin_memcmp with actual loads and compares up to currently maximum 128 total loads. The implementation is identical to PowerPC. Differential Revision: https://reviews.llvm.org/D122676
1 parent 4d10109 commit 5898979

File tree

3 files changed

+83
-0
lines changed

3 files changed

+83
-0
lines changed

llvm/lib/Target/BPF/BPFISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
168168
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
169169
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
170170
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
171+
MaxLoadsPerMemcmp = 0;
171172
} else {
172173
// inline memcpy() for kernel to see explicit copy
173174
unsigned CommonMaxStores =
@@ -176,6 +177,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
176177
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
177178
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
178179
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
180+
MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
179181
}
180182

181183
// CPU/Feature control

llvm/lib/Target/BPF/BPFTargetTransformInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,15 @@ class BPFTTIImpl : public BasicTTIImplBase<BPFTTIImpl> {
7171
Opd2Info, Opd1PropInfo,
7272
Opd2PropInfo);
7373
}
74+
75+
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
76+
bool IsZeroCmp) const {
77+
TTI::MemCmpExpansionOptions Options;
78+
Options.LoadSizes = {8, 4, 2, 1};
79+
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
80+
return Options;
81+
}
82+
7483
};
7584

7685
} // end namespace llvm

llvm/test/CodeGen/BPF/memcmp.ll

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; RUN: llc -march=bpfel < %s | FileCheck %s
2+
; RUN: llc -march=bpfel -mcpu=v3 < %s | FileCheck %s
3+
;
4+
; Source code:
5+
; /* set aligned 4 to minimize the number of loads */
6+
; struct build_id {
7+
; unsigned char id[20];
8+
; } __attribute__((aligned(4)));
9+
;
10+
; /* try to compute a local build_id */
11+
; void bar1(void *);
12+
;
13+
; /* the global build_id to compare */
14+
; struct build_id id2;
15+
;
16+
; int foo()
17+
; {
18+
; struct build_id id1;
19+
;
20+
; bar1(&id1);
21+
; return __builtin_memcmp(&id1, &id2, sizeof(id1)) == 0;
22+
; }
23+
; Compilation flags:
24+
; clang -target bpf -S -O2 t.c -emit-llvm
25+
26+
27+
%struct.build_id = type { [20 x i8] }
28+
29+
@id2 = dso_local global %struct.build_id zeroinitializer, align 4
30+
31+
; Function Attrs: nounwind
32+
define dso_local i32 @foo() local_unnamed_addr #0 {
33+
entry:
34+
%id11 = alloca [20 x i8], align 4
35+
%id11.sub = getelementptr inbounds [20 x i8], [20 x i8]* %id11, i64 0, i64 0
36+
call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %id11.sub) #4
37+
call void @bar1(i8* noundef nonnull %id11.sub) #4
38+
%call = call i32 @memcmp(i8* noundef nonnull dereferenceable(20) %id11.sub, i8* noundef nonnull dereferenceable(20) getelementptr inbounds (%struct.build_id, %struct.build_id* @id2, i64 0, i32 0, i64 0), i64 noundef 20) #4
39+
%cmp = icmp eq i32 %call, 0
40+
%conv = zext i1 %cmp to i32
41+
call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %id11.sub) #4
42+
ret i32 %conv
43+
}
44+
45+
; CHECK: *(u32 *)(r1 + 0)
46+
; CHECK: *(u32 *)(r10 - 20)
47+
; CHECK: *(u32 *)(r10 - 12)
48+
; CHECK: *(u32 *)(r1 + 8)
49+
50+
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
51+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
52+
53+
declare dso_local void @bar1(i8* noundef) local_unnamed_addr #2
54+
55+
; Function Attrs: argmemonly mustprogress nofree nounwind readonly willreturn
56+
declare dso_local i32 @memcmp(i8* nocapture noundef, i8* nocapture noundef, i64 noundef) local_unnamed_addr #3
57+
58+
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
59+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
60+
61+
attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
62+
attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
63+
attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
64+
attributes #3 = { argmemonly mustprogress nofree nounwind readonly willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
65+
attributes #4 = { nounwind }
66+
67+
!llvm.module.flags = !{!0, !1}
68+
!llvm.ident = !{!2}
69+
70+
!0 = !{i32 1, !"wchar_size", i32 4}
71+
!1 = !{i32 7, !"frame-pointer", i32 2}
72+
!2 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git dea65874b2505f8f5e8e51fd8cad6908feb375ec)"}

0 commit comments

Comments
 (0)