Skip to content

Commit 9ff27d5

Browse files
committed
[X86] add additional avgfloor 'add(shift(x,1),shift(y,1),and(x,y,1))' patterns
Add mixture of pattern combos to match against the SDPatternMatch::m_Reassociatable* matchers
1 parent 6defc8e commit 9ff27d5

File tree

2 files changed

+302
-0
lines changed

2 files changed

+302
-0
lines changed

llvm/test/CodeGen/X86/avgfloors-scalar.ll

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
;
66
; fixed avg(x,y) = add(and(x,y),ashr(xor(x,y),1))
77
;
8+
; lsb avg(x,y) = add(ashr(x,1),ashr(y,1),and(x,y,1))
9+
;
810
; ext avg(x,y) = trunc(ashr(add(sext(x),sext(y)),1))
911
;
1012

@@ -33,6 +35,39 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
3335
ret i8 %res
3436
}
3537

38+
define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
39+
; X86-LABEL: test_lsb_i8:
40+
; X86: # %bb.0:
41+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
42+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
43+
; X86-NEXT: movl %eax, %edx
44+
; X86-NEXT: sarb %dl
45+
; X86-NEXT: andb %cl, %al
46+
; X86-NEXT: sarb %cl
47+
; X86-NEXT: addb %dl, %cl
48+
; X86-NEXT: andb $1, %al
49+
; X86-NEXT: addb %cl, %al
50+
; X86-NEXT: retl
51+
;
52+
; X64-LABEL: test_lsb_i8:
53+
; X64: # %bb.0:
54+
; X64-NEXT: movl %edi, %eax
55+
; X64-NEXT: sarb %al
56+
; X64-NEXT: andb %sil, %dil
57+
; X64-NEXT: sarb %sil
58+
; X64-NEXT: addb %sil, %al
59+
; X64-NEXT: andb $1, %dil
60+
; X64-NEXT: addb %dil, %al
61+
; X64-NEXT: retq
62+
%s0 = ashr i8 %a0, 1
63+
%s1 = ashr i8 %a1, 1
64+
%m0 = and i8 %a0, 1
65+
%m1 = and i8 %m0, %a1
66+
%r0 = add i8 %s0, %s1
67+
%r1 = add i8 %r0, %m1
68+
ret i8 %r1
69+
}
70+
3671
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
3772
; X86-LABEL: test_ext_i8:
3873
; X86: # %bb.0:
@@ -84,6 +119,42 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
84119
ret i16 %res
85120
}
86121

122+
define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
123+
; X86-LABEL: test_lsb_i16:
124+
; X86: # %bb.0:
125+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
126+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
127+
; X86-NEXT: movl %eax, %edx
128+
; X86-NEXT: sarl %edx
129+
; X86-NEXT: andl %ecx, %eax
130+
; X86-NEXT: sarl %ecx
131+
; X86-NEXT: addl %edx, %ecx
132+
; X86-NEXT: andl $1, %eax
133+
; X86-NEXT: addl %ecx, %eax
134+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
135+
; X86-NEXT: retl
136+
;
137+
; X64-LABEL: test_lsb_i16:
138+
; X64: # %bb.0:
139+
; X64-NEXT: movswl %si, %eax
140+
; X64-NEXT: movswl %di, %ecx
141+
; X64-NEXT: sarl %ecx
142+
; X64-NEXT: sarl %eax
143+
; X64-NEXT: addl %ecx, %eax
144+
; X64-NEXT: andl %esi, %edi
145+
; X64-NEXT: andl $1, %edi
146+
; X64-NEXT: addl %edi, %eax
147+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
148+
; X64-NEXT: retq
149+
%s0 = ashr i16 %a0, 1
150+
%s1 = ashr i16 %a1, 1
151+
%m0 = and i16 %a0, %a1
152+
%m1 = and i16 %m0, 1
153+
%r0 = add i16 %s0, %s1
154+
%r1 = add i16 %r0, %m1
155+
ret i16 %r1
156+
}
157+
87158
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
88159
; X86-LABEL: test_ext_i16:
89160
; X86: # %bb.0:
@@ -137,6 +208,39 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
137208
ret i32 %res
138209
}
139210

211+
define i32 @test_lsb_i32(i32 %a0, i32 %a1) nounwind {
212+
; X86-LABEL: test_lsb_i32:
213+
; X86: # %bb.0:
214+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
215+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
216+
; X86-NEXT: movl %eax, %edx
217+
; X86-NEXT: sarl %edx
218+
; X86-NEXT: andl %ecx, %eax
219+
; X86-NEXT: sarl %ecx
220+
; X86-NEXT: addl %edx, %ecx
221+
; X86-NEXT: andl $1, %eax
222+
; X86-NEXT: addl %ecx, %eax
223+
; X86-NEXT: retl
224+
;
225+
; X64-LABEL: test_lsb_i32:
226+
; X64: # %bb.0:
227+
; X64-NEXT: movl %edi, %eax
228+
; X64-NEXT: sarl %eax
229+
; X64-NEXT: andl %esi, %edi
230+
; X64-NEXT: sarl %esi
231+
; X64-NEXT: addl %esi, %eax
232+
; X64-NEXT: andl $1, %edi
233+
; X64-NEXT: addl %edi, %eax
234+
; X64-NEXT: retq
235+
%s0 = ashr i32 %a0, 1
236+
%s1 = ashr i32 %a1, 1
237+
%m0 = and i32 %a0, %a1
238+
%m1 = and i32 %m0, 1
239+
%r0 = add i32 %s0, %m1
240+
%r1 = add i32 %r0, %s1
241+
ret i32 %r1
242+
}
243+
140244
define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
141245
; X86-LABEL: test_ext_i32:
142246
; X86: # %bb.0:
@@ -205,6 +309,53 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
205309
ret i64 %res
206310
}
207311

312+
define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
313+
; X86-LABEL: test_lsb_i64:
314+
; X86: # %bb.0:
315+
; X86-NEXT: pushl %ebx
316+
; X86-NEXT: pushl %edi
317+
; X86-NEXT: pushl %esi
318+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
319+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
320+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
321+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
322+
; X86-NEXT: movl %edi, %ebx
323+
; X86-NEXT: sarl %ebx
324+
; X86-NEXT: shldl $31, %eax, %edi
325+
; X86-NEXT: movl %ecx, %edx
326+
; X86-NEXT: sarl %edx
327+
; X86-NEXT: shldl $31, %esi, %ecx
328+
; X86-NEXT: addl %edi, %ecx
329+
; X86-NEXT: adcl %ebx, %edx
330+
; X86-NEXT: andl %esi, %eax
331+
; X86-NEXT: andl $1, %eax
332+
; X86-NEXT: addl %ecx, %eax
333+
; X86-NEXT: adcl $0, %edx
334+
; X86-NEXT: popl %esi
335+
; X86-NEXT: popl %edi
336+
; X86-NEXT: popl %ebx
337+
; X86-NEXT: retl
338+
;
339+
; X64-LABEL: test_lsb_i64:
340+
; X64: # %bb.0:
341+
; X64-NEXT: movq %rdi, %rcx
342+
; X64-NEXT: sarq %rcx
343+
; X64-NEXT: andl %esi, %edi
344+
; X64-NEXT: movq %rsi, %rax
345+
; X64-NEXT: sarq %rax
346+
; X64-NEXT: addq %rcx, %rax
347+
; X64-NEXT: andl $1, %edi
348+
; X64-NEXT: addq %rdi, %rax
349+
; X64-NEXT: retq
350+
%s0 = ashr i64 %a0, 1
351+
%s1 = ashr i64 %a1, 1
352+
%s = add i64 %s1, %s0
353+
%m0 = and i64 %a0, 1
354+
%m1 = and i64 %m0, %a1
355+
%res = add i64 %s, %m1
356+
ret i64 %res
357+
}
358+
208359
define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
209360
; X86-LABEL: test_ext_i64:
210361
; X86: # %bb.0:

llvm/test/CodeGen/X86/avgflooru-scalar.ll

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
;
66
; fixed avg(x,y) = add(and(x,y),lshr(xor(x,y),1))
77
;
8+
; lsb avg(x,y) = add(lshr(x,1),lshr(y,1),and(x,y,1))
9+
;
810
; ext avg(x,y) = trunc(lshr(add(zext(x),zext(y)),1))
911
;
1012

@@ -33,6 +35,39 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
3335
ret i8 %res
3436
}
3537

38+
define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
39+
; X86-LABEL: test_lsb_i8:
40+
; X86: # %bb.0:
41+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
42+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
43+
; X86-NEXT: movl %eax, %edx
44+
; X86-NEXT: shrb %dl
45+
; X86-NEXT: andb %cl, %al
46+
; X86-NEXT: shrb %cl
47+
; X86-NEXT: addb %dl, %cl
48+
; X86-NEXT: andb $1, %al
49+
; X86-NEXT: addb %cl, %al
50+
; X86-NEXT: retl
51+
;
52+
; X64-LABEL: test_lsb_i8:
53+
; X64: # %bb.0:
54+
; X64-NEXT: movl %edi, %eax
55+
; X64-NEXT: shrb %al
56+
; X64-NEXT: andb %sil, %dil
57+
; X64-NEXT: shrb %sil
58+
; X64-NEXT: addb %sil, %al
59+
; X64-NEXT: andb $1, %dil
60+
; X64-NEXT: addb %dil, %al
61+
; X64-NEXT: retq
62+
%s0 = lshr i8 %a0, 1
63+
%s1 = lshr i8 %a1, 1
64+
%m0 = and i8 %a0, 1
65+
%m1 = and i8 %m0, %a1
66+
%r0 = add i8 %s0, %s1
67+
%r1 = add i8 %r0, %m1
68+
ret i8 %r1
69+
}
70+
3671
define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
3772
; X86-LABEL: test_ext_i8:
3873
; X86: # %bb.0:
@@ -84,6 +119,42 @@ define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
84119
ret i16 %res
85120
}
86121

122+
define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
123+
; X86-LABEL: test_lsb_i16:
124+
; X86: # %bb.0:
125+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
126+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
127+
; X86-NEXT: movl %eax, %edx
128+
; X86-NEXT: shrl %edx
129+
; X86-NEXT: andl %ecx, %eax
130+
; X86-NEXT: shrl %ecx
131+
; X86-NEXT: addl %edx, %ecx
132+
; X86-NEXT: andl $1, %eax
133+
; X86-NEXT: addl %ecx, %eax
134+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
135+
; X86-NEXT: retl
136+
;
137+
; X64-LABEL: test_lsb_i16:
138+
; X64: # %bb.0:
139+
; X64-NEXT: movzwl %si, %eax
140+
; X64-NEXT: movzwl %di, %ecx
141+
; X64-NEXT: shrl %ecx
142+
; X64-NEXT: shrl %eax
143+
; X64-NEXT: addl %ecx, %eax
144+
; X64-NEXT: andl %esi, %edi
145+
; X64-NEXT: andl $1, %edi
146+
; X64-NEXT: addl %edi, %eax
147+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
148+
; X64-NEXT: retq
149+
%s0 = lshr i16 %a0, 1
150+
%s1 = lshr i16 %a1, 1
151+
%m0 = and i16 %a0, %a1
152+
%m1 = and i16 %m0, 1
153+
%r0 = add i16 %s0, %s1
154+
%r1 = add i16 %r0, %m1
155+
ret i16 %r1
156+
}
157+
87158
define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
88159
; X86-LABEL: test_ext_i16:
89160
; X86: # %bb.0:
@@ -137,6 +208,39 @@ define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
137208
ret i32 %res
138209
}
139210

211+
define i32 @test_lsb_i32(i32 %a0, i32 %a1) nounwind {
212+
; X86-LABEL: test_lsb_i32:
213+
; X86: # %bb.0:
214+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
215+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
216+
; X86-NEXT: movl %eax, %edx
217+
; X86-NEXT: shrl %edx
218+
; X86-NEXT: andl %ecx, %eax
219+
; X86-NEXT: shrl %ecx
220+
; X86-NEXT: addl %edx, %ecx
221+
; X86-NEXT: andl $1, %eax
222+
; X86-NEXT: addl %ecx, %eax
223+
; X86-NEXT: retl
224+
;
225+
; X64-LABEL: test_lsb_i32:
226+
; X64: # %bb.0:
227+
; X64-NEXT: movl %edi, %eax
228+
; X64-NEXT: shrl %eax
229+
; X64-NEXT: andl %esi, %edi
230+
; X64-NEXT: shrl %esi
231+
; X64-NEXT: addl %esi, %eax
232+
; X64-NEXT: andl $1, %edi
233+
; X64-NEXT: addl %edi, %eax
234+
; X64-NEXT: retq
235+
%s0 = lshr i32 %a0, 1
236+
%s1 = lshr i32 %a1, 1
237+
%m0 = and i32 %a0, %a1
238+
%m1 = and i32 %m0, 1
239+
%r0 = add i32 %s0, %m1
240+
%r1 = add i32 %r0, %s1
241+
ret i32 %r1
242+
}
243+
140244
define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
141245
; X86-LABEL: test_ext_i32:
142246
; X86: # %bb.0:
@@ -193,6 +297,53 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
193297
ret i64 %res
194298
}
195299

300+
define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
301+
; X86-LABEL: test_lsb_i64:
302+
; X86: # %bb.0:
303+
; X86-NEXT: pushl %ebx
304+
; X86-NEXT: pushl %edi
305+
; X86-NEXT: pushl %esi
306+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
307+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
308+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
309+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
310+
; X86-NEXT: movl %edi, %ebx
311+
; X86-NEXT: shrl %ebx
312+
; X86-NEXT: shldl $31, %eax, %edi
313+
; X86-NEXT: movl %ecx, %edx
314+
; X86-NEXT: shrl %edx
315+
; X86-NEXT: shldl $31, %esi, %ecx
316+
; X86-NEXT: addl %edi, %ecx
317+
; X86-NEXT: adcl %ebx, %edx
318+
; X86-NEXT: andl %esi, %eax
319+
; X86-NEXT: andl $1, %eax
320+
; X86-NEXT: addl %ecx, %eax
321+
; X86-NEXT: adcl $0, %edx
322+
; X86-NEXT: popl %esi
323+
; X86-NEXT: popl %edi
324+
; X86-NEXT: popl %ebx
325+
; X86-NEXT: retl
326+
;
327+
; X64-LABEL: test_lsb_i64:
328+
; X64: # %bb.0:
329+
; X64-NEXT: movq %rdi, %rcx
330+
; X64-NEXT: shrq %rcx
331+
; X64-NEXT: andl %esi, %edi
332+
; X64-NEXT: movq %rsi, %rax
333+
; X64-NEXT: shrq %rax
334+
; X64-NEXT: addq %rcx, %rax
335+
; X64-NEXT: andl $1, %edi
336+
; X64-NEXT: addq %rdi, %rax
337+
; X64-NEXT: retq
338+
%s0 = lshr i64 %a0, 1
339+
%s1 = lshr i64 %a1, 1
340+
%s = add i64 %s1, %s0
341+
%m0 = and i64 %a0, 1
342+
%m1 = and i64 %m0, %a1
343+
%res = add i64 %s, %m1
344+
ret i64 %res
345+
}
346+
196347
define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
197348
; X86-LABEL: test_ext_i64:
198349
; X86: # %bb.0:

0 commit comments

Comments
 (0)