Skip to content

Commit 1393aeb

Browse files
authored
Pre-commit AMDGPU tests for masked load/store/scatter/gather (#104645)
I'm planning to fix the masked operation scalarazer to not generate suboptimal code on AMD GPUs and other SIMT machines, and so am adding tests now.
1 parent e8ca306 commit 1393aeb

File tree

5 files changed

+407
-0
lines changed

5 files changed

+407
-0
lines changed
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; COM: Test that, unlike on CPU targets, the mask doesn't get bitcast to a scalar,
5+
; COM: since, on GPUs, each i1 takes up at least one register and so they should
6+
; COM: be treated separately.
7+
8+
define <2 x i32> @scalarize_v2i32(ptr %p, <2 x i1> %mask, <2 x i32> %passthru) {
9+
; CHECK-LABEL: define <2 x i32> @scalarize_v2i32(
10+
; CHECK-SAME: ptr [[P:%.*]], <2 x i1> [[MASK:%.*]], <2 x i32> [[PASSTHRU:%.*]]) {
11+
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK]] to i2
12+
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
13+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
14+
; CHECK-NEXT: br i1 [[TMP2]], label %[[COND_LOAD:.*]], label %[[ELSE:.*]]
15+
; CHECK: [[COND_LOAD]]:
16+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 0
17+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
18+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[PASSTHRU]], i32 [[TMP4]], i64 0
19+
; CHECK-NEXT: br label %[[ELSE]]
20+
; CHECK: [[ELSE]]:
21+
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i32> [ [[TMP5]], %[[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
22+
; CHECK-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
23+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
24+
; CHECK-NEXT: br i1 [[TMP7]], label %[[COND_LOAD1:.*]], label %[[ELSE2:.*]]
25+
; CHECK: [[COND_LOAD1]]:
26+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[P]], i32 1
27+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
28+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[RES_PHI_ELSE]], i32 [[TMP9]], i64 1
29+
; CHECK-NEXT: br label %[[ELSE2]]
30+
; CHECK: [[ELSE2]]:
31+
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i32> [ [[TMP10]], %[[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], %[[ELSE]] ]
32+
; CHECK-NEXT: ret <2 x i32> [[RES_PHI_ELSE3]]
33+
;
34+
%ret = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %p, i32 128, <2 x i1> %mask, <2 x i32> %passthru)
35+
ret <2 x i32> %ret
36+
}
37+
38+
define <2 x i32> @scalarize_v2i32_splat_mask(ptr %p, i1 %mask, <2 x i32> %passthrough) {
39+
; CHECK-LABEL: define <2 x i32> @scalarize_v2i32_splat_mask(
40+
; CHECK-SAME: ptr [[P:%.*]], i1 [[MASK:%.*]], <2 x i32> [[PASSTHROUGH:%.*]]) {
41+
; CHECK-NEXT: [[MASK_VEC:%.*]] = insertelement <2 x i1> poison, i1 [[MASK]], i32 0
42+
; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <2 x i1> [[MASK_VEC]], <2 x i1> poison, <2 x i32> zeroinitializer
43+
; CHECK-NEXT: [[MASK_SPLAT_FIRST:%.*]] = extractelement <2 x i1> [[MASK_SPLAT]], i64 0
44+
; CHECK-NEXT: br i1 [[MASK_SPLAT_FIRST]], label %[[COND_LOAD:.*]], label %[[BB1:.*]]
45+
; CHECK: [[COND_LOAD]]:
46+
; CHECK-NEXT: [[RET_COND_LOAD:%.*]] = load <2 x i32>, ptr [[P]], align 8
47+
; CHECK-NEXT: br label %[[BB1]]
48+
; CHECK: [[BB1]]:
49+
; CHECK-NEXT: [[RET:%.*]] = phi <2 x i32> [ [[RET_COND_LOAD]], %[[COND_LOAD]] ], [ [[PASSTHROUGH]], [[TMP0:%.*]] ]
50+
; CHECK-NEXT: ret <2 x i32> [[RET]]
51+
;
52+
%mask.vec = insertelement <2 x i1> poison, i1 %mask, i32 0
53+
%mask.splat = shufflevector <2 x i1> %mask.vec, <2 x i1> poison, <2 x i32> zeroinitializer
54+
%ret = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %p, i32 8, <2 x i1> %mask.splat, <2 x i32> %passthrough)
55+
ret <2 x i32> %ret
56+
}
57+
58+
define <2 x half> @scalarize_v2f16(ptr %p, <2 x i1> %mask, <2 x half> %passthru) {
59+
; CHECK-LABEL: define <2 x half> @scalarize_v2f16(
60+
; CHECK-SAME: ptr [[P:%.*]], <2 x i1> [[MASK:%.*]], <2 x half> [[PASSTHRU:%.*]]) {
61+
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK]] to i2
62+
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
63+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
64+
; CHECK-NEXT: br i1 [[TMP2]], label %[[COND_LOAD:.*]], label %[[ELSE:.*]]
65+
; CHECK: [[COND_LOAD]]:
66+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[P]], i32 0
67+
; CHECK-NEXT: [[TMP4:%.*]] = load half, ptr [[TMP3]], align 2
68+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[PASSTHRU]], half [[TMP4]], i64 0
69+
; CHECK-NEXT: br label %[[ELSE]]
70+
; CHECK: [[ELSE]]:
71+
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x half> [ [[TMP5]], %[[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
72+
; CHECK-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
73+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
74+
; CHECK-NEXT: br i1 [[TMP7]], label %[[COND_LOAD1:.*]], label %[[ELSE2:.*]]
75+
; CHECK: [[COND_LOAD1]]:
76+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds half, ptr [[P]], i32 1
77+
; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[TMP8]], align 2
78+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x half> [[RES_PHI_ELSE]], half [[TMP9]], i64 1
79+
; CHECK-NEXT: br label %[[ELSE2]]
80+
; CHECK: [[ELSE2]]:
81+
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x half> [ [[TMP10]], %[[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], %[[ELSE]] ]
82+
; CHECK-NEXT: ret <2 x half> [[RES_PHI_ELSE3]]
83+
;
84+
%ret = call <2 x half> @llvm.masked.load.v2f16.p0(ptr %p, i32 128, <2 x i1> %mask, <2 x half> %passthru)
85+
ret <2 x half> %ret
86+
}
87+
88+
define <2 x i32> @scalarize_v2i32_p3(ptr addrspace(3) %p, <2 x i1> %mask, <2 x i32> %passthru) {
89+
; CHECK-LABEL: define <2 x i32> @scalarize_v2i32_p3(
90+
; CHECK-SAME: ptr addrspace(3) [[P:%.*]], <2 x i1> [[MASK:%.*]], <2 x i32> [[PASSTHRU:%.*]]) {
91+
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK]] to i2
92+
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
93+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
94+
; CHECK-NEXT: br i1 [[TMP2]], label %[[COND_LOAD:.*]], label %[[ELSE:.*]]
95+
; CHECK: [[COND_LOAD]]:
96+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i32 0
97+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
98+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[PASSTHRU]], i32 [[TMP4]], i64 0
99+
; CHECK-NEXT: br label %[[ELSE]]
100+
; CHECK: [[ELSE]]:
101+
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i32> [ [[TMP5]], %[[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
102+
; CHECK-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
103+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
104+
; CHECK-NEXT: br i1 [[TMP7]], label %[[COND_LOAD1:.*]], label %[[ELSE2:.*]]
105+
; CHECK: [[COND_LOAD1]]:
106+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[P]], i32 1
107+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(3) [[TMP8]], align 4
108+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[RES_PHI_ELSE]], i32 [[TMP9]], i64 1
109+
; CHECK-NEXT: br label %[[ELSE2]]
110+
; CHECK: [[ELSE2]]:
111+
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i32> [ [[TMP10]], %[[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], %[[ELSE]] ]
112+
; CHECK-NEXT: ret <2 x i32> [[RES_PHI_ELSE3]]
113+
;
114+
%ret = call <2 x i32> @llvm.masked.load.v2i32.p3(ptr addrspace(3) %p, i32 128, <2 x i1> %mask, <2 x i32> %passthru)
115+
ret <2 x i32> %ret
116+
}
117+
118+
define <2 x i32> @scalarize_v2i32_lane_mask(ptr %p, <2 x i32> %passthrough) {
119+
; CHECK-LABEL: define <2 x i32> @scalarize_v2i32_lane_mask(
120+
; CHECK-SAME: ptr [[P:%.*]], <2 x i32> [[PASSTHROUGH:%.*]]) {
121+
; CHECK-NEXT: [[ITEM_ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
122+
; CHECK-NEXT: [[MASK:%.*]] = icmp ult i32 [[ITEM_ID]], 16
123+
; CHECK-NEXT: [[MASK_VEC:%.*]] = insertelement <2 x i1> poison, i1 [[MASK]], i32 0
124+
; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <2 x i1> [[MASK_VEC]], <2 x i1> poison, <2 x i32> zeroinitializer
125+
; CHECK-NEXT: [[MASK_SPLAT_FIRST:%.*]] = extractelement <2 x i1> [[MASK_SPLAT]], i64 0
126+
; CHECK-NEXT: br i1 [[MASK_SPLAT_FIRST]], label %[[COND_LOAD:.*]], label %[[BB1:.*]]
127+
; CHECK: [[COND_LOAD]]:
128+
; CHECK-NEXT: [[RET_COND_LOAD:%.*]] = load <2 x i32>, ptr [[P]], align 8
129+
; CHECK-NEXT: br label %[[BB1]]
130+
; CHECK: [[BB1]]:
131+
; CHECK-NEXT: [[RET:%.*]] = phi <2 x i32> [ [[RET_COND_LOAD]], %[[COND_LOAD]] ], [ [[PASSTHROUGH]], [[TMP0:%.*]] ]
132+
; CHECK-NEXT: ret <2 x i32> [[RET]]
133+
;
134+
%item.id = call i32 @llvm.amdgcn.workitem.id.x()
135+
%mask = icmp ult i32 %item.id, 16
136+
%mask.vec = insertelement <2 x i1> poison, i1 %mask, i32 0
137+
%mask.splat = shufflevector <2 x i1> %mask.vec, <2 x i1> poison, <2 x i32> zeroinitializer
138+
%ret = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %p, i32 8, <2 x i1> %mask.splat, <2 x i32> %passthrough)
139+
ret <2 x i32> %ret
140+
}
141+
142+
define <2 x i32> @scalarize_v2i32_group_mask(ptr %p, <2 x i32> %passthrough) {
143+
; CHECK-LABEL: define <2 x i32> @scalarize_v2i32_group_mask(
144+
; CHECK-SAME: ptr [[P:%.*]], <2 x i32> [[PASSTHROUGH:%.*]]) {
145+
; CHECK-NEXT: [[GROUP_ID:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
146+
; CHECK-NEXT: [[MASK:%.*]] = icmp ult i32 [[GROUP_ID]], 4
147+
; CHECK-NEXT: [[MASK_VEC:%.*]] = insertelement <2 x i1> poison, i1 [[MASK]], i32 0
148+
; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <2 x i1> [[MASK_VEC]], <2 x i1> poison, <2 x i32> zeroinitializer
149+
; CHECK-NEXT: [[MASK_SPLAT_FIRST:%.*]] = extractelement <2 x i1> [[MASK_SPLAT]], i64 0
150+
; CHECK-NEXT: br i1 [[MASK_SPLAT_FIRST]], label %[[COND_LOAD:.*]], label %[[BB1:.*]]
151+
; CHECK: [[COND_LOAD]]:
152+
; CHECK-NEXT: [[RET_COND_LOAD:%.*]] = load <2 x i32>, ptr [[P]], align 8
153+
; CHECK-NEXT: br label %[[BB1]]
154+
; CHECK: [[BB1]]:
155+
; CHECK-NEXT: [[RET:%.*]] = phi <2 x i32> [ [[RET_COND_LOAD]], %[[COND_LOAD]] ], [ [[PASSTHROUGH]], [[TMP0:%.*]] ]
156+
; CHECK-NEXT: ret <2 x i32> [[RET]]
157+
;
158+
%group.id = call i32 @llvm.amdgcn.workgroup.id.x()
159+
%mask = icmp ult i32 %group.id, 4
160+
%mask.vec = insertelement <2 x i1> poison, i1 %mask, i32 0
161+
%mask.splat = shufflevector <2 x i1> %mask.vec, <2 x i1> poison, <2 x i32> zeroinitializer
162+
%ret = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %p, i32 8, <2 x i1> %mask.splat, <2 x i32> %passthrough)
163+
ret <2 x i32> %ret
164+
}
165+
166+
declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
167+
declare <2 x half> @llvm.masked.load.v2f16.p0(ptr, i32, <2 x i1>, <2 x half>)
168+
declare <2 x i32> @llvm.masked.load.v2i32.p3(ptr addrspace(3), i32, <2 x i1>, <2 x i32>)
169+
declare noundef i32 @llvm.amdgcn.workitem.id.x()
170+
declare noundef i32 @llvm.amdgcn.workgroup.id.x()
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; COM: Test that, unlike on CPU targets, the mask doesn't get bitcast to a scalar,
5+
; COM: since, on GPUs, each i1 takes up at least one register and so they should
6+
; COM: be treated separately.
7+
8+
define <2 x i32> @scalarize_v2i32(<2 x ptr> %p, <2 x i1> %mask, <2 x i32> %passthru) {
9+
; CHECK-LABEL: define <2 x i32> @scalarize_v2i32(
10+
; CHECK-SAME: <2 x ptr> [[P:%.*]], <2 x i1> [[MASK:%.*]], <2 x i32> [[PASSTHRU:%.*]]) {
11+
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK]] to i2
12+
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
13+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
14+
; CHECK-NEXT: br i1 [[TMP2]], label %[[COND_LOAD:.*]], label %[[ELSE:.*]]
15+
; CHECK: [[COND_LOAD]]:
16+
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x ptr> [[P]], i64 0
17+
; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr [[PTR0]], align 8
18+
; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i32> [[PASSTHRU]], i32 [[LOAD0]], i64 0
19+
; CHECK-NEXT: br label %[[ELSE]]
20+
; CHECK: [[ELSE]]:
21+
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i32> [ [[RES0]], %[[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
22+
; CHECK-NEXT: [[TMP3:%.*]] = and i2 [[SCALAR_MASK]], -2
23+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i2 [[TMP3]], 0
24+
; CHECK-NEXT: br i1 [[TMP4]], label %[[COND_LOAD1:.*]], label %[[ELSE2:.*]]
25+
; CHECK: [[COND_LOAD1]]:
26+
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x ptr> [[P]], i64 1
27+
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[PTR1]], align 8
28+
; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i32> [[RES_PHI_ELSE]], i32 [[LOAD1]], i64 1
29+
; CHECK-NEXT: br label %[[ELSE2]]
30+
; CHECK: [[ELSE2]]:
31+
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i32> [ [[RES1]], %[[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], %[[ELSE]] ]
32+
; CHECK-NEXT: ret <2 x i32> [[RES_PHI_ELSE3]]
33+
;
34+
%ret = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %p, i32 8, <2 x i1> %mask, <2 x i32> %passthru)
35+
ret <2 x i32> %ret
36+
}
37+
38+
declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=amdgcn-amd-amdhsa | FileCheck %s
3+
4+
; COM: Test that, unlike on CPU targets, the mask doesn't get bitcast to a scalar,
5+
; COM: since, on GPUs, each i1 takes up at least one register and so they should
6+
; COM: be treated separately.
7+
8+
define void @scalarize_v2i32(<2 x ptr> %p, <2 x i1> %mask, <2 x i32> %value) {
9+
; CHECK-LABEL: define void @scalarize_v2i32(
10+
; CHECK-SAME: <2 x ptr> [[P:%.*]], <2 x i1> [[MASK:%.*]], <2 x i32> [[VALUE:%.*]]) {
11+
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK]] to i2
12+
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
13+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
14+
; CHECK-NEXT: br i1 [[TMP2]], label %[[COND_STORE:.*]], label %[[ELSE:.*]]
15+
; CHECK: [[COND_STORE]]:
16+
; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x i32> [[VALUE]], i64 0
17+
; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x ptr> [[P]], i64 0
18+
; CHECK-NEXT: store i32 [[ELT0]], ptr [[PTR0]], align 8
19+
; CHECK-NEXT: br label %[[ELSE]]
20+
; CHECK: [[ELSE]]:
21+
; CHECK-NEXT: [[TMP3:%.*]] = and i2 [[SCALAR_MASK]], -2
22+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i2 [[TMP3]], 0
23+
; CHECK-NEXT: br i1 [[TMP4]], label %[[COND_STORE1:.*]], label %[[ELSE2:.*]]
24+
; CHECK: [[COND_STORE1]]:
25+
; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x i32> [[VALUE]], i64 1
26+
; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x ptr> [[P]], i64 1
27+
; CHECK-NEXT: store i32 [[ELT1]], ptr [[PTR1]], align 8
28+
; CHECK-NEXT: br label %[[ELSE2]]
29+
; CHECK: [[ELSE2]]:
30+
; CHECK-NEXT: ret void
31+
;
32+
call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %value, <2 x ptr> %p, i32 8, <2 x i1> %mask)
33+
ret void
34+
}
35+
36+
declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)

0 commit comments

Comments
 (0)