Skip to content

Commit f5b1892

Browse files
committed
[AArch64][x86] add tests for trunc disguised as vector ops (PR39016); NFC
These correspond to the IR transform from: D52439 llvm-svn: 344353
1 parent 0a3bb81 commit f5b1892

File tree

2 files changed

+172
-1
lines changed

2 files changed

+172
-1
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64_be-- < %s | FileCheck %s --check-prefix=BE
3+
; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s --check-prefix=LE
4+
5+
define i32 @trunc_i64_to_i32_le(i64 %x) {
6+
; BE-LABEL: trunc_i64_to_i32_le:
7+
; BE: // %bb.0:
8+
; BE-NEXT: fmov d0, x0
9+
; BE-NEXT: rev64 v0.4s, v0.4s
10+
; BE-NEXT: fmov w0, s0
11+
; BE-NEXT: ret
12+
;
13+
; LE-LABEL: trunc_i64_to_i32_le:
14+
; LE: // %bb.0:
15+
; LE-NEXT: fmov d0, x0
16+
; LE-NEXT: fmov w0, s0
17+
; LE-NEXT: ret
18+
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
19+
%bc = bitcast <2 x i64> %ins to <4 x i32>
20+
%ext = extractelement <4 x i32> %bc, i32 0
21+
ret i32 %ext
22+
}
23+
24+
define i32 @trunc_i64_to_i32_be(i64 %x) {
25+
; BE-LABEL: trunc_i64_to_i32_be:
26+
; BE: // %bb.0:
27+
; BE-NEXT: fmov d0, x0
28+
; BE-NEXT: rev64 v0.4s, v0.4s
29+
; BE-NEXT: mov w0, v0.s[1]
30+
; BE-NEXT: ret
31+
;
32+
; LE-LABEL: trunc_i64_to_i32_be:
33+
; LE: // %bb.0:
34+
; LE-NEXT: fmov d0, x0
35+
; LE-NEXT: mov w0, v0.s[1]
36+
; LE-NEXT: ret
37+
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
38+
%bc = bitcast <2 x i64> %ins to <4 x i32>
39+
%ext = extractelement <4 x i32> %bc, i32 1
40+
ret i32 %ext
41+
}
42+
43+
define i16 @trunc_i64_to_i16_le(i64 %x) {
44+
; BE-LABEL: trunc_i64_to_i16_le:
45+
; BE: // %bb.0:
46+
; BE-NEXT: fmov d0, x0
47+
; BE-NEXT: rev64 v0.8h, v0.8h
48+
; BE-NEXT: umov w0, v0.h[0]
49+
; BE-NEXT: ret
50+
;
51+
; LE-LABEL: trunc_i64_to_i16_le:
52+
; LE: // %bb.0:
53+
; LE-NEXT: fmov d0, x0
54+
; LE-NEXT: umov w0, v0.h[0]
55+
; LE-NEXT: ret
56+
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
57+
%bc = bitcast <2 x i64> %ins to <8 x i16>
58+
%ext = extractelement <8 x i16> %bc, i32 0
59+
ret i16 %ext
60+
}
61+
62+
define i16 @trunc_i64_to_i16_be(i64 %x) {
63+
; BE-LABEL: trunc_i64_to_i16_be:
64+
; BE: // %bb.0:
65+
; BE-NEXT: fmov d0, x0
66+
; BE-NEXT: rev64 v0.8h, v0.8h
67+
; BE-NEXT: umov w0, v0.h[3]
68+
; BE-NEXT: ret
69+
;
70+
; LE-LABEL: trunc_i64_to_i16_be:
71+
; LE: // %bb.0:
72+
; LE-NEXT: fmov d0, x0
73+
; LE-NEXT: umov w0, v0.h[3]
74+
; LE-NEXT: ret
75+
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
76+
%bc = bitcast <2 x i64> %ins to <8 x i16>
77+
%ext = extractelement <8 x i16> %bc, i32 3
78+
ret i16 %ext
79+
}
80+
81+
define i8 @trunc_i32_to_i8_le(i32 %x) {
82+
; BE-LABEL: trunc_i32_to_i8_le:
83+
; BE: // %bb.0:
84+
; BE-NEXT: fmov s0, w0
85+
; BE-NEXT: rev32 v0.16b, v0.16b
86+
; BE-NEXT: umov w0, v0.b[0]
87+
; BE-NEXT: ret
88+
;
89+
; LE-LABEL: trunc_i32_to_i8_le:
90+
; LE: // %bb.0:
91+
; LE-NEXT: fmov s0, w0
92+
; LE-NEXT: umov w0, v0.b[0]
93+
; LE-NEXT: ret
94+
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
95+
%bc = bitcast <4 x i32> %ins to <16 x i8>
96+
%ext = extractelement <16 x i8> %bc, i32 0
97+
ret i8 %ext
98+
}
99+
100+
define i8 @trunc_i32_to_i8_be(i32 %x) {
101+
; BE-LABEL: trunc_i32_to_i8_be:
102+
; BE: // %bb.0:
103+
; BE-NEXT: fmov s0, w0
104+
; BE-NEXT: rev32 v0.16b, v0.16b
105+
; BE-NEXT: umov w0, v0.b[3]
106+
; BE-NEXT: ret
107+
;
108+
; LE-LABEL: trunc_i32_to_i8_be:
109+
; LE: // %bb.0:
110+
; LE-NEXT: fmov s0, w0
111+
; LE-NEXT: umov w0, v0.b[3]
112+
; LE-NEXT: ret
113+
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
114+
%bc = bitcast <4 x i32> %ins to <16 x i8>
115+
%ext = extractelement <16 x i8> %bc, i32 3
116+
ret i8 %ext
117+
}
118+

llvm/test/CodeGen/X86/extract-insert.ll

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define i8 @extractelt_bitcast(i32 %x) nounwind {
2929
}
3030

3131
; TODO: This should have folded to avoid vector ops, but the transform
32-
; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU
32+
; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU
3333
; codegen better.
3434

3535
define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind {
@@ -60,3 +60,56 @@ define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind {
6060
ret i8 %ext
6161
}
6262

63+
define i32 @trunc_i64_to_i32_le(i64 %x) {
64+
; X86-LABEL: trunc_i64_to_i32_le:
65+
; X86: # %bb.0:
66+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
67+
; X86-NEXT: retl
68+
;
69+
; X64-LABEL: trunc_i64_to_i32_le:
70+
; X64: # %bb.0:
71+
; X64-NEXT: movq %rdi, %xmm0
72+
; X64-NEXT: movd %xmm0, %eax
73+
; X64-NEXT: retq
74+
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
75+
%bc = bitcast <2 x i64> %ins to <4 x i32>
76+
%ext = extractelement <4 x i32> %bc, i32 0
77+
ret i32 %ext
78+
}
79+
80+
define i16 @trunc_i64_to_i16_le(i64 %x) {
81+
; X86-LABEL: trunc_i64_to_i16_le:
82+
; X86: # %bb.0:
83+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
84+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
85+
; X86-NEXT: retl
86+
;
87+
; X64-LABEL: trunc_i64_to_i16_le:
88+
; X64: # %bb.0:
89+
; X64-NEXT: movq %rdi, %xmm0
90+
; X64-NEXT: movd %xmm0, %eax
91+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
92+
; X64-NEXT: retq
93+
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
94+
%bc = bitcast <2 x i64> %ins to <8 x i16>
95+
%ext = extractelement <8 x i16> %bc, i32 0
96+
ret i16 %ext
97+
}
98+
99+
define i8 @trunc_i32_to_i8_le(i32 %x) {
100+
; X86-LABEL: trunc_i32_to_i8_le:
101+
; X86: # %bb.0:
102+
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
103+
; X86-NEXT: retl
104+
;
105+
; X64-LABEL: trunc_i32_to_i8_le:
106+
; X64: # %bb.0:
107+
; X64-NEXT: movl %edi, %eax
108+
; X64-NEXT: # kill: def $al killed $al killed $eax
109+
; X64-NEXT: retq
110+
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
111+
%bc = bitcast <4 x i32> %ins to <16 x i8>
112+
%ext = extractelement <16 x i8> %bc, i32 0
113+
ret i8 %ext
114+
}
115+

0 commit comments

Comments
 (0)