Skip to content

Commit d3097b7

Browse files
committed
[LSV] Precommit tests
This commit adds tests to introduce bitcasts for increased vectorization of loads and stores. NFC.
1 parent 091d35a commit d3097b7

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - < %s | FileCheck %s
3+
4+
define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr) {
5+
; CHECK-LABEL: define void @merge_i32_2i16_float_4i8(
6+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
7+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PTR]], i64 0
8+
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[GEP1]], align 4
9+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(1) [[PTR]], i64 1
10+
; CHECK-NEXT: [[LOAD2:%.*]] = load <2 x i16>, ptr addrspace(1) [[GEP2]], align 4
11+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR]], i64 2
12+
; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr addrspace(1) [[GEP3]], align 4
13+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(1) [[PTR]], i64 3
14+
; CHECK-NEXT: [[LOAD4:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP4]], align 4
15+
; CHECK-NEXT: ret void
16+
;
17+
%gep1 = getelementptr inbounds i32, ptr addrspace(1) %ptr, i64 0
18+
%load1 = load i32, ptr addrspace(1) %gep1, align 4
19+
%gep2 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %ptr, i64 1
20+
%load2 = load <2 x i16>, ptr addrspace(1) %gep2, align 4
21+
%gep3 = getelementptr inbounds float, ptr addrspace(1) %ptr, i64 2
22+
%load3 = load float, ptr addrspace(1) %gep3, align 4
23+
%gep4 = getelementptr inbounds <4 x i8>, ptr addrspace(1) %ptr, i64 3
24+
%load4 = load <4 x i8>, ptr addrspace(1) %gep4, align 4
25+
ret void
26+
}
27+
28+
define void @no_merge_i32_i16(ptr addrspace(1) %ptr) {
29+
; CHECK-LABEL: define void @no_merge_i32_i16(
30+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
31+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[PTR]], i64 0
32+
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[GEP1]], align 4
33+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[PTR]], i64 1
34+
; CHECK-NEXT: [[LOAD2:%.*]] = load i16, ptr addrspace(1) [[GEP2]], align 4
35+
; CHECK-NEXT: ret void
36+
;
37+
%gep1 = getelementptr inbounds ptr, ptr addrspace(1) %ptr, i64 0
38+
%load1 = load i32, ptr addrspace(1) %gep1, align 4
39+
%gep2 = getelementptr inbounds ptr, ptr addrspace(1) %ptr, i64 1
40+
%load2 = load i16, ptr addrspace(1) %gep2, align 4
41+
ret void
42+
}
43+
44+
define void @merge_i64_double_ptr(ptr addrspace(1) %ptr, ptr addrspace(2) %ptr2) {
45+
; CHECK-LABEL: define void @merge_i64_double_ptr(
46+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
47+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[PTR]], i64 0
48+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[PTR]], i64 2
49+
; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i64>, ptr addrspace(1) [[GEP1]], align 4
50+
; CHECK-NEXT: [[LOAD11:%.*]] = extractelement <3 x i64> [[TMP1]], i32 0
51+
; CHECK-NEXT: [[LOAD22:%.*]] = extractelement <3 x i64> [[TMP1]], i32 1
52+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[LOAD22]] to double
53+
; CHECK-NEXT: [[LOAD33:%.*]] = extractelement <3 x i64> [[TMP1]], i32 2
54+
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[LOAD33]] to ptr
55+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[LOAD11]], i32 0
56+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast double [[TMP2]] to i64
57+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP5]], i32 1
58+
; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr addrspace(1) [[GEP1]], align 4
59+
; CHECK-NEXT: store ptr [[TMP3]], ptr addrspace(1) [[GEP3]], align 4
60+
; CHECK-NEXT: ret void
61+
;
62+
%gep1 = getelementptr inbounds i64, ptr addrspace(1) %ptr, i64 0
63+
%gep2 = getelementptr inbounds double, ptr addrspace(1) %ptr, i64 1
64+
%gep3 = getelementptr inbounds ptr, ptr addrspace(1) %ptr, i64 2
65+
%load1 = load i64, ptr addrspace(1) %gep1, align 4
66+
%load2 = load double, ptr addrspace(1) %gep2, align 4
67+
%load3 = load ptr, ptr addrspace(1) %gep3, align 4
68+
store i64 %load1, ptr addrspace(1) %gep1, align 4
69+
store double %load2, ptr addrspace(1) %gep2, align 4
70+
store ptr %load3, ptr addrspace(1) %gep3, align 4
71+
ret void
72+
}
73+
74+
define void @merge_i16_half(ptr addrspace(1) %ptr) {
75+
; CHECK-LABEL: define void @merge_i16_half(
76+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
77+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[PTR]], i64 0
78+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(1) [[GEP1]], align 4
79+
; CHECK-NEXT: [[LOAD11:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0
80+
; CHECK-NEXT: [[LOAD22:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1
81+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[LOAD22]] to half
82+
; CHECK-NEXT: ret void
83+
;
84+
%gep1 = getelementptr inbounds i16, ptr addrspace(1) %ptr, i64 0
85+
%load1 = load i16, ptr addrspace(1) %gep1, align 4
86+
%gep2 = getelementptr inbounds half, ptr addrspace(1) %ptr, i64 1
87+
%load2 = load half, ptr addrspace(1) %gep2, align 4
88+
ret void
89+
}

0 commit comments

Comments
 (0)