Skip to content

Commit a07639f

Browse files
authored
[AArch64] Increase inline memmove limit to 16 stored registers (#111848)
The memcpy inline limit has been 16 for a long time, this patch makes the memmove inline limit the same, allowing small-constant sized memmoves to be emitted inline. The 16 is the number of registers stored, which equates to a limit of 256 bytes.
1 parent 5bf81e5 commit a07639f

File tree

3 files changed

+226
-68
lines changed

3 files changed

+226
-68
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11461146
Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
11471147

11481148
MaxStoresPerMemmoveOptSize = 4;
1149-
MaxStoresPerMemmove = 4;
1149+
MaxStoresPerMemmove =
1150+
Subtarget->requiresStrictAlign() ? MaxStoresPerMemmoveOptSize : 16;
11501151

11511152
MaxLoadsPerMemcmpOptSize = 4;
11521153
MaxLoadsPerMemcmp =

llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir

Lines changed: 102 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,12 @@ body: |
6161
6262
; CHECK-LABEL: name: test_memmove1
6363
; CHECK: liveins: $x0, $x1, $x2
64-
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
65-
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
66-
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
67-
; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
68-
; CHECK: RET_ReallyLR
64+
; CHECK-NEXT: {{ $}}
65+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
66+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
67+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
68+
; CHECK-NEXT: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
69+
; CHECK-NEXT: RET_ReallyLR
6970
%0:_(p0) = COPY $x0
7071
%1:_(p0) = COPY $x1
7172
%2:_(s64) = COPY $x2
@@ -83,23 +84,24 @@ body: |
8384
8485
; CHECK-LABEL: name: test_memmove2_const
8586
; CHECK: liveins: $x0, $x1
86-
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
87-
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
88-
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
89-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
90-
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
91-
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
92-
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
93-
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
94-
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
95-
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
96-
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
97-
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
98-
; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4)
99-
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
100-
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
101-
; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4)
102-
; CHECK: RET_ReallyLR
87+
; CHECK-NEXT: {{ $}}
88+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
89+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
90+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
91+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
92+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
93+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
94+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
95+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
96+
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
97+
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
98+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
99+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
100+
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p0) :: (store (s128) into %ir.0 + 16, align 4)
101+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
102+
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
103+
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 32, align 4)
104+
; CHECK-NEXT: RET_ReallyLR
103105
%0:_(p0) = COPY $x0
104106
%1:_(p0) = COPY $x1
105107
%2:_(s64) = G_CONSTANT i64 48
@@ -117,11 +119,42 @@ body: |
117119
118120
; CHECK-LABEL: name: test_memmove3_const_toolarge
119121
; CHECK: liveins: $x0, $x1
120-
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
121-
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
122-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
123-
; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4)
124-
; CHECK: RET_ReallyLR
122+
; CHECK-NEXT: {{ $}}
123+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
124+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
125+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
126+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
127+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
128+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
129+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
130+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
131+
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
132+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
133+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
134+
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load (s128) from %ir.1 + 48, align 4)
135+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
136+
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
137+
; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD3]](p0) :: (load (s128) from %ir.1 + 64, align 4)
138+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
139+
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
140+
; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load (s128) from %ir.1 + 80, align 4)
141+
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
142+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
143+
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
144+
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD5]](p0) :: (store (s128) into %ir.0 + 16, align 4)
145+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
146+
; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
147+
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD6]](p0) :: (store (s128) into %ir.0 + 32, align 4)
148+
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
149+
; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
150+
; CHECK-NEXT: G_STORE [[LOAD3]](s128), [[PTR_ADD7]](p0) :: (store (s128) into %ir.0 + 48, align 4)
151+
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
152+
; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
153+
; CHECK-NEXT: G_STORE [[LOAD4]](s128), [[PTR_ADD8]](p0) :: (store (s128) into %ir.0 + 64, align 4)
154+
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
155+
; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
156+
; CHECK-NEXT: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store (s128) into %ir.0 + 80, align 4)
157+
; CHECK-NEXT: RET_ReallyLR
125158
%0:_(p0) = COPY $x0
126159
%1:_(p0) = COPY $x1
127160
%2:_(s64) = G_CONSTANT i64 96
@@ -139,29 +172,30 @@ body: |
139172
140173
; CHECK-LABEL: name: test_memmove4_const_unaligned
141174
; CHECK: liveins: $x0, $x1
142-
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
143-
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
144-
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
145-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
146-
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
147-
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
148-
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
149-
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
150-
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
151-
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
152-
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
153-
; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48)
154-
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
155-
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
156-
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
157-
; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4)
158-
; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
159-
; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
160-
; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4)
161-
; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
162-
; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
163-
; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48)
164-
; CHECK: RET_ReallyLR
175+
; CHECK-NEXT: {{ $}}
176+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
177+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
178+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.1, align 4)
179+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
180+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
181+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load (s128) from %ir.1 + 16, align 4)
182+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
183+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
184+
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p0) :: (load (s128) from %ir.1 + 32, align 4)
185+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
186+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
187+
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.1 + 48)
188+
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store (s128) into %ir.0, align 4)
189+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
190+
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
191+
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD3]](p0) :: (store (s128) into %ir.0 + 16, align 4)
192+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
193+
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
194+
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD4]](p0) :: (store (s128) into %ir.0 + 32, align 4)
195+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
196+
; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
197+
; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD5]](p0) :: (store (s32) into %ir.0 + 48)
198+
; CHECK-NEXT: RET_ReallyLR
165199
%0:_(p0) = COPY $x0
166200
%1:_(p0) = COPY $x1
167201
%2:_(s64) = G_CONSTANT i64 52
@@ -179,23 +213,24 @@ body: |
179213
180214
; CHECK-LABEL: name: test_memmove_addrspace
181215
; CHECK: liveins: $x0, $x1
182-
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
183-
; CHECK: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
184-
; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
185-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
186-
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
187-
; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
188-
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
189-
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
190-
; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
191-
; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
192-
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
193-
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
194-
; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
195-
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
196-
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
197-
; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
198-
; CHECK: RET_ReallyLR
216+
; CHECK-NEXT: {{ $}}
217+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $x0
218+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1
219+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2)
220+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
221+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64)
222+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2)
223+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
224+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64)
225+
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2)
226+
; CHECK-NEXT: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1)
227+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
228+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
229+
; CHECK-NEXT: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1)
230+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
231+
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
232+
; CHECK-NEXT: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1)
233+
; CHECK-NEXT: RET_ReallyLR
199234
%0:_(p1) = COPY $x0
200235
%1:_(p2) = COPY $x1
201236
%2:_(s64) = G_CONSTANT i64 48
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ALIGNED
3+
; RUN: llc -mtriple=aarch64 -mattr=+strict-align < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNALIGNED
4+
5+
; Small (16 bytes here) unaligned memmove() should be a function call if
6+
; strict-alignment is turned on.
7+
define void @t16(ptr %out, ptr %in) {
8+
; CHECK-ALIGNED-LABEL: t16:
9+
; CHECK-ALIGNED: // %bb.0: // %entry
10+
; CHECK-ALIGNED-NEXT: ldr q0, [x1]
11+
; CHECK-ALIGNED-NEXT: str q0, [x0]
12+
; CHECK-ALIGNED-NEXT: ret
13+
;
14+
; CHECK-UNALIGNED-LABEL: t16:
15+
; CHECK-UNALIGNED: // %bb.0: // %entry
16+
; CHECK-UNALIGNED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
17+
; CHECK-UNALIGNED-NEXT: .cfi_def_cfa_offset 16
18+
; CHECK-UNALIGNED-NEXT: .cfi_offset w30, -16
19+
; CHECK-UNALIGNED-NEXT: mov w2, #16 // =0x10
20+
; CHECK-UNALIGNED-NEXT: bl memmove
21+
; CHECK-UNALIGNED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
22+
; CHECK-UNALIGNED-NEXT: ret
23+
entry:
24+
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
25+
ret void
26+
}
27+
28+
; Small (16 bytes here) aligned memmove() should be inlined even if
29+
; strict-alignment is turned on.
30+
define void @t16_aligned(ptr align 8 %out, ptr align 8 %in) {
31+
; CHECK-ALIGNED-LABEL: t16_aligned:
32+
; CHECK-ALIGNED: // %bb.0: // %entry
33+
; CHECK-ALIGNED-NEXT: ldr q0, [x1]
34+
; CHECK-ALIGNED-NEXT: str q0, [x0]
35+
; CHECK-ALIGNED-NEXT: ret
36+
;
37+
; CHECK-UNALIGNED-LABEL: t16_aligned:
38+
; CHECK-UNALIGNED: // %bb.0: // %entry
39+
; CHECK-UNALIGNED-NEXT: ldp x9, x8, [x1]
40+
; CHECK-UNALIGNED-NEXT: stp x9, x8, [x0]
41+
; CHECK-UNALIGNED-NEXT: ret
42+
entry:
43+
call void @llvm.memmove.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
44+
ret void
45+
}
46+
47+
; Tiny (4 bytes here) unaligned memmove() should be inlined with byte sized
48+
; loads and stores if strict-alignment is turned on.
49+
define void @t4(ptr %out, ptr %in) {
50+
; CHECK-ALIGNED-LABEL: t4:
51+
; CHECK-ALIGNED: // %bb.0: // %entry
52+
; CHECK-ALIGNED-NEXT: ldr w8, [x1]
53+
; CHECK-ALIGNED-NEXT: str w8, [x0]
54+
; CHECK-ALIGNED-NEXT: ret
55+
;
56+
; CHECK-UNALIGNED-LABEL: t4:
57+
; CHECK-UNALIGNED: // %bb.0: // %entry
58+
; CHECK-UNALIGNED-NEXT: ldrb w8, [x1, #3]
59+
; CHECK-UNALIGNED-NEXT: ldrb w9, [x1, #2]
60+
; CHECK-UNALIGNED-NEXT: ldrb w10, [x1]
61+
; CHECK-UNALIGNED-NEXT: ldrb w11, [x1, #1]
62+
; CHECK-UNALIGNED-NEXT: strb w8, [x0, #3]
63+
; CHECK-UNALIGNED-NEXT: strb w9, [x0, #2]
64+
; CHECK-UNALIGNED-NEXT: strb w11, [x0, #1]
65+
; CHECK-UNALIGNED-NEXT: strb w10, [x0]
66+
; CHECK-UNALIGNED-NEXT: ret
67+
entry:
68+
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
69+
ret void
70+
}
71+
72+
define void @t256(ptr %out, ptr %in) {
73+
; CHECK-ALIGNED-LABEL: t256:
74+
; CHECK-ALIGNED: // %bb.0: // %entry
75+
; CHECK-ALIGNED-NEXT: ldp q0, q1, [x1]
76+
; CHECK-ALIGNED-NEXT: ldp q2, q3, [x1, #32]
77+
; CHECK-ALIGNED-NEXT: ldp q4, q5, [x1, #64]
78+
; CHECK-ALIGNED-NEXT: ldp q6, q7, [x1, #96]
79+
; CHECK-ALIGNED-NEXT: ldp q16, q17, [x1, #224]
80+
; CHECK-ALIGNED-NEXT: ldp q18, q19, [x1, #128]
81+
; CHECK-ALIGNED-NEXT: ldp q20, q21, [x1, #160]
82+
; CHECK-ALIGNED-NEXT: ldp q22, q23, [x1, #192]
83+
; CHECK-ALIGNED-NEXT: stp q0, q1, [x0]
84+
; CHECK-ALIGNED-NEXT: stp q2, q3, [x0, #32]
85+
; CHECK-ALIGNED-NEXT: stp q4, q5, [x0, #64]
86+
; CHECK-ALIGNED-NEXT: stp q6, q7, [x0, #96]
87+
; CHECK-ALIGNED-NEXT: stp q18, q19, [x0, #128]
88+
; CHECK-ALIGNED-NEXT: stp q20, q21, [x0, #160]
89+
; CHECK-ALIGNED-NEXT: stp q22, q23, [x0, #192]
90+
; CHECK-ALIGNED-NEXT: stp q16, q17, [x0, #224]
91+
; CHECK-ALIGNED-NEXT: ret
92+
;
93+
; CHECK-UNALIGNED-LABEL: t256:
94+
; CHECK-UNALIGNED: // %bb.0: // %entry
95+
; CHECK-UNALIGNED-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
96+
; CHECK-UNALIGNED-NEXT: .cfi_def_cfa_offset 16
97+
; CHECK-UNALIGNED-NEXT: .cfi_offset w30, -16
98+
; CHECK-UNALIGNED-NEXT: mov w2, #256 // =0x100
99+
; CHECK-UNALIGNED-NEXT: bl memmove
100+
; CHECK-UNALIGNED-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
101+
; CHECK-UNALIGNED-NEXT: ret
102+
entry:
103+
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 256, i1 false)
104+
ret void
105+
}
106+
107+
define void @t257(ptr %out, ptr %in) {
108+
; CHECK-LABEL: t257:
109+
; CHECK: // %bb.0: // %entry
110+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
111+
; CHECK-NEXT: .cfi_def_cfa_offset 16
112+
; CHECK-NEXT: .cfi_offset w30, -16
113+
; CHECK-NEXT: mov w2, #257 // =0x101
114+
; CHECK-NEXT: bl memmove
115+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
116+
; CHECK-NEXT: ret
117+
entry:
118+
call void @llvm.memmove.p0.p0.i64(ptr %out, ptr %in, i64 257, i1 false)
119+
ret void
120+
}
121+
122+
declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)

0 commit comments

Comments
 (0)