Skip to content

Commit b5d1320

Browse files
authored
[NFC][NVPTX] Add a simpler test case for 0b80288 (#73379)
While 0b80288 allowed more efficient lowering for 16xi8 loads, its test case was closer to an "integration" one. Add a much simpler unit test case that exercises it.
1 parent 1116e4f commit b5d1320

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed
Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,39 @@
11
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
22
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
33

4-
; CHECK: .visible .func foo1
4+
; CHECK-LABEL: .visible .func foo1
55
; CHECK: st.v2.f32
66
define void @foo1(<2 x float> %val, ptr %ptr) {
77
store <2 x float> %val, ptr %ptr
88
ret void
99
}
1010

11-
; CHECK: .visible .func foo2
11+
; CHECK-LABEL: .visible .func foo2
1212
; CHECK: st.v4.f32
1313
define void @foo2(<4 x float> %val, ptr %ptr) {
1414
store <4 x float> %val, ptr %ptr
1515
ret void
1616
}
1717

18-
; CHECK: .visible .func foo3
18+
; CHECK-LABEL: .visible .func foo3
1919
; CHECK: st.v2.u32
2020
define void @foo3(<2 x i32> %val, ptr %ptr) {
2121
store <2 x i32> %val, ptr %ptr
2222
ret void
2323
}
2424

25-
; CHECK: .visible .func foo4
25+
; CHECK-LABEL: .visible .func foo4
2626
; CHECK: st.v4.u32
2727
define void @foo4(<4 x i32> %val, ptr %ptr) {
2828
store <4 x i32> %val, ptr %ptr
2929
ret void
3030
}
3131

32+
; CHECK-LABEL: .visible .func v16i8
33+
define void @v16i8(ptr %a, ptr %b) {
34+
; CHECK: ld.v4.u32
35+
; CHECK: st.v4.u32
36+
%v = load <16 x i8>, ptr %a
37+
store <16 x i8> %v, ptr %b
38+
ret void
39+
}

0 commit comments

Comments
 (0)