@@ -70,15 +70,15 @@ int main() {
70
70
matrix_layout::row_major>
71
71
sub_c;
72
72
73
- // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1f32(float addrspace(1)* %_arg_ , i32 16) #{{.*}}
73
+ // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32.p1f32(float addrspace(1)* %_arg_accC , i32 16) #{{.*}}
74
74
joint_matrix_load (sg, sub_c, accC.get_pointer (), N);
75
75
// CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0i32(i32* %call.ascast.i.i{{.*}}.i, i32 8) #{{.*}}
76
76
joint_matrix_load (sg, sub_a, accA.get_pointer (), K);
77
77
// CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.b.row.stride.tf32.p0i32(i32* %call.ascast.i.i{{.*}}.i, i32 16) #{{.*}}
78
78
joint_matrix_load (sg, sub_b, accB.get_pointer (), N);
79
- // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %10 , i32 %11 , i32 %12 , i32 %13 , i32 %15 , i32 %16 , i32 %17 , i32 %18 , float %1 , float %2 , float %3 , float %4 , float %5 , float %6 , float %7 , float %8 ) #{{.*}}
79
+ // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 %{{.*}} , i32 {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} ) #{{.*}}
80
80
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
81
- // CHECK: tail call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32.p1f32(float addrspace(1)* %_arg_14 , float %20 , float %21 , float %22 , float %23 , float %24 , float %25 , float %26 , float %27 , i32 16) # {{.*}}
81
+ // CHECK: tail call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32.p1f32(float addrspace(1)* {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , i32 {{.*}}
82
82
joint_matrix_store (sg, sub_c, accD.get_pointer (), N);
83
83
});
84
84
});
@@ -106,15 +106,15 @@ int main() {
106
106
matrix_layout::col_major>
107
107
sub_c;
108
108
109
- // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1f32(float addrspace(1)* %_arg_ , i32 16 ) #{{.*}}
109
+ // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32.p1f32(float addrspace(1)* {{.*}} , i32 {{.*}} ) #{{.*}}
110
110
joint_matrix_load (sg, sub_c, accC.get_pointer (), N);
111
111
// CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.col.stride.tf32.p0i32(i32* %call.ascast.i.i{{.*}}.i, i32 8) #{{.*}}
112
112
joint_matrix_load (sg, sub_a, accA.get_pointer (), K);
113
113
// CHECK: tail call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.b.col.stride.tf32.p0i32(i32* %call.ascast.i.i{{.*}}.i, i32 16) #{{.*}}
114
114
joint_matrix_load (sg, sub_b, accB.get_pointer (), N);
115
- // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.col.col.tf32(i32 %10 , i32 %11 , i32 %12 , i32 %13 , i32 %15 , i32 %16 , i32 %17 , i32 %18 , float %1 , float %2 , float %3 , float %4 , float %5 , float %6 , float %7 , float %8 ) #{{.*}}
115
+ // CHECK: tail call { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.col.col.tf32(i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , i32 {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} ) #{{.*}}
116
116
sub_c = joint_matrix_mad (sg, sub_a, sub_b, sub_c);
117
- // CHECK: tail call void @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32.p1f32(float addrspace(1)* %_arg_14 , float %20 , float %21 , float %22 , float %23 , float %24 , float %25 , float %26 , float %27 , i32 16) #{{.*}}
117
+ // CHECK: tail call void @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32.p1f32(float addrspace(1)* {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , float {{.*}} , i32 16) #{{.*}}
118
118
joint_matrix_store (sg, sub_c, accD.get_pointer (), N);
119
119
});
120
120
});
0 commit comments