Skip to content

Commit 7d62e48

Browse files
committed
[GlobalISel Enable memcpy inlining with optsize.
We should be disabling inline for minsize, not optsize. llvm-svn: 373143
1 parent f7a428e commit 7d62e48

File tree

3 files changed

+94
-2
lines changed

3 files changed

+94
-2
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class CombinerInfo {
3131
bool MinSize)
3232
: IllegalOpsAllowed(AllowIllegalOps),
3333
LegalizeIllegalOps(ShouldLegalizeIllegal), LInfo(LInfo),
34-
EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(OptSize) {
34+
EnableOpt(OptEnabled), EnableOptSize(OptSize), EnableMinSize(MinSize) {
3535
assert(((AllowIllegalOps || !LegalizeIllegalOps) || LInfo) &&
3636
"Expecting legalizerInfo when illegalops not allowed");
3737
}

llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
7373
// heuristics decide.
7474
unsigned MaxLen = EnableOpt ? 0 : 32;
7575
// Try to inline memcpy type calls if optimizations are enabled.
76-
return (!EnableOptSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen)
76+
return (!EnableMinSize) ? Helper.tryCombineMemCpyFamily(MI, MaxLen)
7777
: false;
7878
}
7979
default:

llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@
2222
ret void
2323
}
2424

25+
define void @test_memcpy2_const_optsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #2 {
26+
entry:
27+
%0 = bitcast i32* %dst to i8*
28+
%1 = bitcast i32* %src to i8*
29+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
30+
ret void
31+
}
32+
33+
define void @test_memcpy2_const_minsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #3 {
34+
entry:
35+
%0 = bitcast i32* %dst to i8*
36+
%1 = bitcast i32* %src to i8*
37+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
38+
ret void
39+
}
40+
2541
define void @test_memcpy3_const_arrays_unaligned(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 {
2642
entry:
2743
%0 = bitcast i32* %dst to i8*
@@ -32,6 +48,8 @@
3248

3349
attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" }
3450
attributes #1 = { argmemonly nounwind }
51+
attributes #2 = { optsize }
52+
attributes #3 = { minsize }
3553

3654
...
3755
---
@@ -107,6 +125,80 @@ body: |
107125
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
108126
RET_ReallyLR
109127
128+
...
129+
---
130+
name: test_memcpy2_const_optsize
131+
alignment: 4
132+
tracksRegLiveness: true
133+
registers:
134+
- { id: 0, class: _ }
135+
- { id: 1, class: _ }
136+
- { id: 2, class: _ }
137+
machineFunctionInfo: {}
138+
body: |
139+
bb.1.entry:
140+
liveins: $x0, $x1
141+
142+
; CHECK-LABEL: name: test_memcpy2_const_optsize
143+
; CHECK: liveins: $x0, $x1
144+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
145+
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
146+
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
147+
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
148+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
149+
; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C]](s64)
150+
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
151+
; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64)
152+
; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
153+
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
154+
; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C1]](s64)
155+
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
156+
; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64)
157+
; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
158+
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
159+
; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C2]](s64)
160+
; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
161+
; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C2]](s64)
162+
; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
163+
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
164+
; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C3]](s64)
165+
; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4)
166+
; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C3]](s64)
167+
; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4)
168+
; CHECK: RET_ReallyLR
169+
%0:_(p0) = COPY $x0
170+
%1:_(p0) = COPY $x1
171+
%2:_(s64) = G_CONSTANT i64 72
172+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
173+
RET_ReallyLR
174+
175+
...
176+
---
177+
name: test_memcpy2_const_minsize
178+
alignment: 4
179+
tracksRegLiveness: true
180+
registers:
181+
- { id: 0, class: _ }
182+
- { id: 1, class: _ }
183+
- { id: 2, class: _ }
184+
machineFunctionInfo: {}
185+
body: |
186+
bb.1.entry:
187+
liveins: $x0, $x1
188+
189+
; CHECK-LABEL: name: test_memcpy2_const_minsize
190+
; CHECK: liveins: $x0, $x1
191+
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
192+
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
193+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72
194+
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
195+
; CHECK: RET_ReallyLR
196+
%0:_(p0) = COPY $x0
197+
%1:_(p0) = COPY $x1
198+
%2:_(s64) = G_CONSTANT i64 72
199+
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.memcpy), %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
200+
RET_ReallyLR
201+
110202
...
111203
---
112204
name: test_memcpy3_const_arrays_unaligned

0 commit comments

Comments
 (0)