Skip to content

Commit 36abb65

Browse files
committed
more test updates
1 parent eb8e0b9 commit 36abb65

File tree

9 files changed

+74
-74
lines changed

9 files changed

+74
-74
lines changed

clang/test/CodeGenCUDA/bf16.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// CHECK: .param .align 2 .b8 _Z8test_argPDF16bDF16b_param_1[2]
1212
//
1313
__device__ void test_arg(__bf16 *out, __bf16 in) {
14-
// CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [_Z8test_argPDF16bDF16b_param_0];
14+
// CHECK-DAG: ld.param.b64 %[[A:rd[0-9]+]], [_Z8test_argPDF16bDF16b_param_0];
1515
// CHECK-DAG: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_argPDF16bDF16b_param_1];
1616
__bf16 bf16 = in;
1717
*out = bf16;

clang/test/CodeGenCUDA/fp-contract.cu

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -179,26 +179,26 @@
179179
__host__ __device__ float func(float a, float b, float c) { return a + b * c; }
180180
// COMMON-LABEL: _Z4funcfff
181181
// NV-ON: fma.rn.f32
182-
// NV-ON-NEXT: st.param.f32
182+
// NV-ON-NEXT: st.param.b32
183183
// AMD-ON: v_fmac_f32_e64
184184
// AMD-ON-NEXT: s_setpc_b64
185185

186186
// NV-OFF: mul.rn.f32
187187
// NV-OFF-NEXT: add.rn.f32
188-
// NV-OFF-NEXT: st.param.f32
188+
// NV-OFF-NEXT: st.param.b32
189189
// AMD-OFF: v_mul_f32_e64
190190
// AMD-OFF-NEXT: v_add_f32_e64
191191
// AMD-OFF-NEXT: s_setpc_b64
192192

193193
// NV-OPT-FAST: fma.rn.f32
194-
// NV-OPT-FAST-NEXT: st.param.f32
194+
// NV-OPT-FAST-NEXT: st.param.b32
195195
// NV-OPT-FASTSTD: fma.rn.f32
196-
// NV-OPT-FASTSTD-NEXT: st.param.f32
196+
// NV-OPT-FASTSTD-NEXT: st.param.b32
197197
// NV-OPT-ON: fma.rn.f32
198-
// NV-OPT-ON-NEXT: st.param.f32
198+
// NV-OPT-ON-NEXT: st.param.b32
199199
// NV-OPT-OFF: mul.rn.f32
200200
// NV-OPT-OFF-NEXT: add.rn.f32
201-
// NV-OPT-OFF-NEXT: st.param.f32
201+
// NV-OPT-OFF-NEXT: st.param.b32
202202

203203
// AMD-OPT-FAST-IR: fmul contract float
204204
// AMD-OPT-FAST-IR: fadd contract float
@@ -224,15 +224,15 @@ __host__ __device__ float func2(float a, float b, float c) {
224224
}
225225
// COMMON-LABEL: _Z5func2fff
226226
// NV-OPT-FAST: fma.rn.f32
227-
// NV-OPT-FAST-NEXT: st.param.f32
227+
// NV-OPT-FAST-NEXT: st.param.b32
228228
// NV-OPT-FASTSTD: fma.rn.f32
229-
// NV-OPT-FASTSTD-NEXT: st.param.f32
229+
// NV-OPT-FASTSTD-NEXT: st.param.b32
230230
// NV-OPT-ON: mul.rn.f32
231231
// NV-OPT-ON: add.rn.f32
232-
// NV-OPT-ON-NEXT: st.param.f32
232+
// NV-OPT-ON-NEXT: st.param.b32
233233
// NV-OPT-OFF: mul.rn.f32
234234
// NV-OPT-OFF: add.rn.f32
235-
// NV-OPT-OFF-NEXT: st.param.f32
235+
// NV-OPT-OFF-NEXT: st.param.b32
236236

237237
// AMD-OPT-FAST-IR: fmul contract float
238238
// AMD-OPT-FAST-IR: fadd contract float
@@ -267,16 +267,16 @@ __host__ __device__ float func2(float a, float b, float c) {
267267
}
268268
// COMMON-LABEL: _Z5func3fff
269269
// NV-OPT-FAST: fma.rn.f32
270-
// NV-OPT-FAST-NEXT: st.param.f32
270+
// NV-OPT-FAST-NEXT: st.param.b32
271271
// NV-OPT-FASTSTD: mul.rn.f32
272272
// NV-OPT-FASTSTD: add.rn.f32
273-
// NV-OPT-FASTSTD-NEXT: st.param.f32
273+
// NV-OPT-FASTSTD-NEXT: st.param.b32
274274
// NV-OPT-ON: mul.rn.f32
275275
// NV-OPT-ON: add.rn.f32
276-
// NV-OPT-ON-NEXT: st.param.f32
276+
// NV-OPT-ON-NEXT: st.param.b32
277277
// NV-OPT-OFF: mul.rn.f32
278278
// NV-OPT-OFF: add.rn.f32
279-
// NV-OPT-OFF-NEXT: st.param.f32
279+
// NV-OPT-OFF-NEXT: st.param.b32
280280

281281
// AMD-OPT-FAST-IR: fmul float
282282
// AMD-OPT-FAST-IR: fadd float

clang/test/CodeGenCUDA/memcpy-libcall.cu

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@
1010
// PTX-LABEL: .func _Z12copy_genericPvPKv(
1111
void __device__ copy_generic(void *dest, const void *src) {
1212
__builtin_memcpy(dest, src, 32);
13-
// PTX: ld.u8
14-
// PTX: st.u8
13+
// PTX: ld.b8
14+
// PTX: st.b8
1515
}
1616

1717
// PTX-LABEL: .entry _Z11copy_globalPvS_(
1818
void __global__ copy_global(void *dest, void * src) {
1919
__builtin_memcpy(dest, src, 32);
20-
// PTX: ld.global.u8
21-
// PTX: st.global.u8
20+
// PTX: ld.global.b8
21+
// PTX: st.global.b8
2222
}
2323

2424
struct S {
@@ -28,37 +28,37 @@ struct S {
2828
// PTX-LABEL: .entry _Z20copy_param_to_globalP1SS_(
2929
void __global__ copy_param_to_global(S *global, S param) {
3030
__builtin_memcpy(global, &param, sizeof(S));
31-
// PTX: ld.param.u32
32-
// PTX: st.global.u32
31+
// PTX: ld.param.b32
32+
// PTX: st.global.b32
3333
}
3434

3535
// PTX-LABEL: .entry _Z19copy_param_to_localPU3AS51SS_(
3636
void __global__ copy_param_to_local(__attribute__((address_space(5))) S *local,
3737
S param) {
3838
__builtin_memcpy(local, &param, sizeof(S));
39-
// PTX: ld.param.u32
40-
// PTX: st.local.u32
39+
// PTX: ld.param.b32
40+
// PTX: st.local.b32
4141
}
4242

4343
// PTX-LABEL: .func _Z21copy_local_to_genericP1SPU3AS5S_(
4444
void __device__ copy_local_to_generic(S *generic,
4545
__attribute__((address_space(5))) S *src) {
4646
__builtin_memcpy(generic, src, sizeof(S));
47-
// PTX: ld.local.u32
48-
// PTX: st.u32
47+
// PTX: ld.local.b32
48+
// PTX: st.b32
4949
}
5050

5151
__shared__ S shared;
5252

5353
// PTX-LABEL: .entry _Z20copy_param_to_shared1S(
5454
void __global__ copy_param_to_shared( S param) {
5555
__builtin_memcpy(&shared, &param, sizeof(S));
56-
// PTX: ld.param.u32
57-
// PTX: st.shared.u32
56+
// PTX: ld.param.b32
57+
// PTX: st.shared.b32
5858
}
5959

6060
void __device__ copy_shared_to_generic(S *generic) {
6161
__builtin_memcpy(generic, &shared, sizeof(S));
62-
// PTX: ld.shared.u32
63-
// PTX: st.u32
62+
// PTX: ld.shared.b32
63+
// PTX: st.b32
6464
}

llvm/test/DebugInfo/NVPTX/debug-info.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@
2424
; CHECK-DAG: .reg .b32 %r<6>;
2525
; CHECK-DAG: .reg .b64 %rd<8>;
2626
; CHECK: .loc [[DEBUG_INFO_CU:[0-9]+]] 5 0
27-
; CHECK: ld.param.u32 %r{{.+}}, [{{.+}}];
28-
; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}];
27+
; CHECK: ld.param.b32 %r{{.+}}, [{{.+}}];
28+
; CHECK: ld.param.b64 %rd{{.+}}, [{{.+}}];
2929
; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}};
30-
; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}];
30+
; CHECK: ld.param.b64 %rd{{.+}}, [{{.+}}];
3131
; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}};
3232
; CHECK: .loc [[BUILTUIN_VARS_H:[0-9]+]] 78 180
3333
; CHECK: mov.u32 %r{{.+}}, %ctaid.x;
@@ -41,18 +41,18 @@
4141
; CHECK: setp.ge.s32 %p{{.+}}, %r{{.+}}, %r{{.+}};
4242
; CHECK: .loc [[DEBUG_INFO_CU]] 7 7
4343
; CHECK: @%p{{.+}} bra [[BB:\$L__.+]];
44-
; CHECK: ld.param.f32 %f{{.+}}, [{{.+}}];
44+
; CHECK: ld.param.b32 %f{{.+}}, [{{.+}}];
4545
; CHECK: .loc [[DEBUG_INFO_CU]] 8 13
4646
; CHECK: mul.wide.u32 %rd{{.+}}, %r{{.+}}, 4;
4747
; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}};
48-
; CHECK: ld.global.f32 %f{{.+}}, [%rd{{.+}}];
48+
; CHECK: ld.global.b32 %f{{.+}}, [%rd{{.+}}];
4949
; CHECK: .loc [[DEBUG_INFO_CU]] 8 19
5050
; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}};
51-
; CHECK: ld.global.f32 %f{{.+}}, [%rd{{.+}}];
51+
; CHECK: ld.global.b32 %f{{.+}}, [%rd{{.+}}];
5252
; CHECK: .loc [[DEBUG_INFO_CU]] 3 82
5353
; CHECK: fma.rn.f32 %f{{.+}}, %f{{.+}}, %f{{.+}}, %f{{.+}};
5454
; CHECK: .loc [[DEBUG_INFO_CU]] 3 78
55-
; CHECK: st.global.f32 [%rd{{.+}}], %f{{.+}};
55+
; CHECK: st.global.b32 [%rd{{.+}}], %f{{.+}};
5656
; CHECK: [[BB]]:
5757
; CHECK: .loc [[DEBUG_INFO_CU]] 9 1
5858
; CHECK: ret;

llvm/test/Transforms/NaryReassociate/NVPTX/nary-slsr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
1717
define void @nary_reassociate_after_slsr(i32 %a, i32 %b, i32 %c) {
1818
; CHECK-LABEL: @nary_reassociate_after_slsr(
1919
; PTX-LABEL: .visible .func nary_reassociate_after_slsr(
20-
; PTX: ld.param.u32 [[b:%r[0-9]+]], [nary_reassociate_after_slsr_param_1];
20+
; PTX: ld.param.b32 [[b:%r[0-9]+]], [nary_reassociate_after_slsr_param_1];
2121
%ab = add i32 %a, %b
2222
%abc = add i32 %ab, %c
2323
call void @foo(i32 %abc)

llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,10 @@ define void @sum_of_array(i32 %x, i32 %y, ptr nocapture %output) {
7070
ret void
7171
}
7272
; PTX-LABEL: sum_of_array(
73-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
74-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
75-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
76-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
73+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
74+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
75+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
76+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
7777

7878
; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need
7979
; some infrastructure changes to enable such optimizations.
@@ -134,10 +134,10 @@ define void @sum_of_array2(i32 %x, i32 %y, ptr nocapture %output) {
134134
ret void
135135
}
136136
; PTX-LABEL: sum_of_array2(
137-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
138-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
139-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
140-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
137+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
138+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
139+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
140+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
141141

142142

143143

@@ -203,10 +203,10 @@ define void @sum_of_array3(i32 %x, i32 %y, ptr nocapture %output) {
203203
ret void
204204
}
205205
; PTX-LABEL: sum_of_array3(
206-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
207-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
208-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
209-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
206+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
207+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
208+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
209+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
210210

211211

212212

@@ -268,10 +268,10 @@ define void @sum_of_array4(i32 %x, i32 %y, ptr nocapture %output) {
268268
ret void
269269
}
270270
; PTX-LABEL: sum_of_array4(
271-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
272-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
273-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
274-
; PTX-DAG: ld.shared.f32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
271+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG:%(rd|r)[0-9]+]]]
272+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+4]
273+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+128]
274+
; PTX-DAG: ld.shared.b32 {{%f[0-9]+}}, [[[BASE_REG]]+132]
275275

276276

277277

@@ -307,15 +307,15 @@ entry:
307307
%0 = sext i32 %xy to i64
308308
%p0 = getelementptr inbounds float, ptr %input, i64 %0
309309
%v0 = load float, ptr %p0, align 4
310-
; PTX: ld.f32 %f{{[0-9]+}}, [[[p0:%rd[0-9]+]]]
310+
; PTX: ld.b32 %f{{[0-9]+}}, [[[p0:%rd[0-9]+]]]
311311
call void @use(float %v0)
312312

313313
%y5 = add nsw i32 %y, 5
314314
%xy5 = add nsw i32 %x, %y5
315315
%1 = sext i32 %xy5 to i64
316316
%p1 = getelementptr inbounds float, ptr %input, i64 %1
317317
%v1 = load float, ptr %p1, align 4
318-
; PTX: ld.f32 %f{{[0-9]+}}, [[[p0]]+20]
318+
; PTX: ld.b32 %f{{[0-9]+}}, [[[p0]]+20]
319319
call void @use(float %v1)
320320

321321
ret void

llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ define void @slsr_after_reassociate_geps(ptr %arr, i32 %i) {
5151
; CHECK-NEXT: ret void
5252
;
5353
; PTX-LABEL: .visible .func slsr_after_reassociate_geps(
54-
; PTX: ld.param.u64 [[arr:%rd[0-9]+]], [slsr_after_reassociate_geps_param_0];
55-
; PTX: ld.param.u32 [[i:%r[0-9]+]], [slsr_after_reassociate_geps_param_1];
54+
; PTX: ld.param.b64 [[arr:%rd[0-9]+]], [slsr_after_reassociate_geps_param_0];
55+
; PTX: ld.param.b32 [[i:%r[0-9]+]], [slsr_after_reassociate_geps_param_1];
5656
%i2 = shl nsw i32 %i, 1
5757
%i3 = mul nsw i32 %i, 3
5858
%i4 = shl nsw i32 %i, 2
@@ -62,28 +62,28 @@ define void @slsr_after_reassociate_geps(ptr %arr, i32 %i) {
6262
; PTX: mul.wide.s32 [[i4:%rd[0-9]+]], [[i]], 4;
6363
; PTX: add.s64 [[base1:%rd[0-9]+]], [[arr]], [[i4]];
6464
%v1 = load float, ptr %p1, align 4
65-
; PTX: ld.f32 {{%f[0-9]+}}, [[[base1]]+20];
65+
; PTX: ld.b32 {{%f[0-9]+}}, [[[base1]]+20];
6666
call void @foo(float %v1)
6767

6868
%j2 = add nsw i32 %i2, 5
6969
%p2 = getelementptr inbounds float, ptr %arr, i32 %j2
7070
; PTX: add.s64 [[base2:%rd[0-9]+]], [[base1]], [[i4]];
7171
%v2 = load float, ptr %p2, align 4
72-
; PTX: ld.f32 {{%f[0-9]+}}, [[[base2]]+20];
72+
; PTX: ld.b32 {{%f[0-9]+}}, [[[base2]]+20];
7373
call void @foo(float %v2)
7474

7575
%j3 = add nsw i32 %i3, 5
7676
%p3 = getelementptr inbounds float, ptr %arr, i32 %j3
7777
; PTX: add.s64 [[base3:%rd[0-9]+]], [[base2]], [[i4]];
7878
%v3 = load float, ptr %p3, align 4
79-
; PTX: ld.f32 {{%f[0-9]+}}, [[[base3]]+20];
79+
; PTX: ld.b32 {{%f[0-9]+}}, [[[base3]]+20];
8080
call void @foo(float %v3)
8181

8282
%j4 = add nsw i32 %i4, 5
8383
%p4 = getelementptr inbounds float, ptr %arr, i32 %j4
8484
; PTX: add.s64 [[base4:%rd[0-9]+]], [[base3]], [[i4]];
8585
%v4 = load float, ptr %p4, align 4
86-
; PTX: ld.f32 {{%f[0-9]+}}, [[[base4]]+20];
86+
; PTX: ld.b32 {{%f[0-9]+}}, [[[base4]]+20];
8787
call void @foo(float %v4)
8888

8989
ret void

llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ target triple = "nvptx64-nvidia-cuda"
1414
define ptx_kernel void @foo(i32 %b, i32 %s) {
1515
; CHECK-LABEL: .visible .entry foo(
1616
entry:
17-
; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1];
18-
; CHECK: ld.param.u32 [[b:%r[0-9]+]], [foo_param_0];
17+
; CHECK: ld.param.b32 [[s:%r[0-9]+]], [foo_param_1];
18+
; CHECK: ld.param.b32 [[b:%r[0-9]+]], [foo_param_0];
1919
%call = tail call zeroext i1 @cond(i32 0)
2020
br i1 %call, label %if.then, label %for.inc
2121

llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
1010
; CHECK-NEXT: .reg .b64 %rd<13>;
1111
; CHECK-EMPTY:
1212
; CHECK-NEXT: // %bb.0:
13-
; CHECK-NEXT: ld.param.u64 %rd1, [caller_St8x4_param_0+8];
14-
; CHECK-NEXT: ld.param.u64 %rd2, [caller_St8x4_param_0];
15-
; CHECK-NEXT: ld.param.u64 %rd3, [caller_St8x4_param_0+24];
16-
; CHECK-NEXT: ld.param.u64 %rd4, [caller_St8x4_param_0+16];
13+
; CHECK-NEXT: ld.param.b64 %rd1, [caller_St8x4_param_0+8];
14+
; CHECK-NEXT: ld.param.b64 %rd2, [caller_St8x4_param_0];
15+
; CHECK-NEXT: ld.param.b64 %rd3, [caller_St8x4_param_0+24];
16+
; CHECK-NEXT: ld.param.b64 %rd4, [caller_St8x4_param_0+16];
1717
; CHECK-NEXT: { // callseq 0, 0
1818
; CHECK-NEXT: .param .align 16 .b8 param0[32];
1919
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd2, %rd1};
@@ -27,11 +27,11 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
2727
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0];
2828
; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16];
2929
; CHECK-NEXT: } // callseq 0
30-
; CHECK-NEXT: ld.param.u32 %r2, [caller_St8x4_param_1];
31-
; CHECK-NEXT: st.u64 [%r2], %rd5;
32-
; CHECK-NEXT: st.u64 [%r2+8], %rd6;
33-
; CHECK-NEXT: st.u64 [%r2+16], %rd7;
34-
; CHECK-NEXT: st.u64 [%r2+24], %rd8;
30+
; CHECK-NEXT: ld.param.b32 %r2, [caller_St8x4_param_1];
31+
; CHECK-NEXT: st.b64 [%r2], %rd5;
32+
; CHECK-NEXT: st.b64 [%r2+8], %rd6;
33+
; CHECK-NEXT: st.b64 [%r2+16], %rd7;
34+
; CHECK-NEXT: st.b64 [%r2+24], %rd8;
3535
; CHECK-NEXT: ret;
3636
%call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2
3737
%.fca.0.extract = extractvalue [4 x i64] %call, 0
@@ -56,8 +56,8 @@ define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly by
5656
; CHECK-NEXT: .reg .b64 %rd<5>;
5757
; CHECK-EMPTY:
5858
; CHECK-NEXT: // %bb.0:
59-
; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [callee_St8x4_param_0];
60-
; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [callee_St8x4_param_0+16];
59+
; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [callee_St8x4_param_0];
60+
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [callee_St8x4_param_0+16];
6161
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
6262
; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
6363
; CHECK-NEXT: ret;

0 commit comments

Comments
 (0)