Skip to content

Commit 326a615

Browse files
committed
[X86] replace-load-and-with-bzhi.ll - add commuted test cases to show failure to fold
Tests showing combineAndLoadToBZHI commutation folding is currently broken
1 parent 75bcf57 commit 326a615

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

llvm/test/CodeGen/X86/replace-load-and-with-bzhi.ll

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,28 @@ entry:
2626
ret i32 %and
2727
}
2828

29+
define i32 @f32_bzhi_commute(i32 %x, i32 %y) local_unnamed_addr {
30+
; X64-LABEL: f32_bzhi_commute:
31+
; X64: # %bb.0: # %entry
32+
; X64-NEXT: movl %edi, %eax
33+
; X64-NEXT: movslq %esi, %rcx
34+
; X64-NEXT: andl fill_table32(,%rcx,4), %eax
35+
; X64-NEXT: retq
36+
;
37+
; X86-LABEL: f32_bzhi_commute:
38+
; X86: # %bb.0: # %entry
39+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
40+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
41+
; X86-NEXT: andl fill_table32(,%ecx,4), %eax
42+
; X86-NEXT: retl
43+
entry:
44+
%idxprom = sext i32 %y to i64
45+
%arrayidx = getelementptr inbounds [32 x i32], ptr @fill_table32, i64 0, i64 %idxprom
46+
%0 = load i32, ptr %arrayidx, align 4
47+
%and = and i32 %x, %0
48+
ret i32 %and
49+
}
50+
2951
define i32 @f32_bzhi_partial(i32 %x, i32 %y) local_unnamed_addr {
3052
; X64-LABEL: f32_bzhi_partial:
3153
; X64: # %bb.0: # %entry
@@ -45,6 +67,28 @@ entry:
4567
ret i32 %and
4668
}
4769

70+
define i32 @f32_bzhi_partial_commute(i32 %x, i32 %y) local_unnamed_addr {
71+
; X64-LABEL: f32_bzhi_partial_commute:
72+
; X64: # %bb.0: # %entry
73+
; X64-NEXT: movl %edi, %eax
74+
; X64-NEXT: movslq %esi, %rcx
75+
; X64-NEXT: andl fill_table32_partial(,%rcx,4), %eax
76+
; X64-NEXT: retq
77+
;
78+
; X86-LABEL: f32_bzhi_partial_commute:
79+
; X86: # %bb.0: # %entry
80+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
81+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
82+
; X86-NEXT: andl fill_table32_partial(,%ecx,4), %eax
83+
; X86-NEXT: retl
84+
entry:
85+
%idxprom = sext i32 %y to i64
86+
%arrayidx = getelementptr inbounds [17 x i32], ptr @fill_table32_partial, i64 0, i64 %idxprom
87+
%0 = load i32, ptr %arrayidx, align 4
88+
%and = and i32 %x, %0
89+
ret i32 %and
90+
}
91+
4892
define i64 @f64_bzhi(i64 %x, i64 %y) local_unnamed_addr {
4993
; X64-LABEL: f64_bzhi:
5094
; X64: # %bb.0: # %entry
@@ -66,6 +110,28 @@ entry:
66110
ret i64 %and
67111
}
68112

113+
define i64 @f64_bzhi_commute(i64 %x, i64 %y) local_unnamed_addr {
114+
; X64-LABEL: f64_bzhi_commute:
115+
; X64: # %bb.0: # %entry
116+
; X64-NEXT: movq %rdi, %rax
117+
; X64-NEXT: andq fill_table64(,%rsi,8), %rax
118+
; X64-NEXT: retq
119+
;
120+
; X86-LABEL: f64_bzhi_commute:
121+
; X86: # %bb.0: # %entry
122+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
123+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
124+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
125+
; X86-NEXT: andl fill_table64(,%ecx,8), %eax
126+
; X86-NEXT: andl fill_table64+4(,%ecx,8), %edx
127+
; X86-NEXT: retl
128+
entry:
129+
%arrayidx = getelementptr inbounds [64 x i64], ptr @fill_table64, i64 0, i64 %y
130+
%0 = load i64, ptr %arrayidx, align 8
131+
%and = and i64 %x, %0
132+
ret i64 %and
133+
}
134+
69135
define i64 @f64_bzhi_partial(i64 %x, i64 %y) local_unnamed_addr {
70136
; X64-LABEL: f64_bzhi_partial:
71137
; X64: # %bb.0: # %entry
@@ -87,3 +153,24 @@ entry:
87153
ret i64 %and
88154
}
89155

156+
define i64 @f64_bzhi_partial_commute(i64 %x, i64 %y) local_unnamed_addr {
157+
; X64-LABEL: f64_bzhi_partial_commute:
158+
; X64: # %bb.0: # %entry
159+
; X64-NEXT: movq %rdi, %rax
160+
; X64-NEXT: andq fill_table64_partial(,%rsi,8), %rax
161+
; X64-NEXT: retq
162+
;
163+
; X86-LABEL: f64_bzhi_partial_commute:
164+
; X86: # %bb.0: # %entry
165+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
166+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
167+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
168+
; X86-NEXT: andl fill_table64_partial(,%ecx,8), %eax
169+
; X86-NEXT: andl fill_table64_partial+4(,%ecx,8), %edx
170+
; X86-NEXT: retl
171+
entry:
172+
%arrayidx = getelementptr inbounds [51 x i64], ptr @fill_table64_partial, i64 0, i64 %y
173+
%0 = load i64, ptr %arrayidx, align 8
174+
%and = and i64 %x, %0
175+
ret i64 %and
176+
}

0 commit comments

Comments
 (0)