19
19
// CHECK-SCF-IF-DAG: #[[$TIMES2:.*]] = affine_map<()[s0] -> (s0 * 2)>
20
20
// CHECK-SCF-IF-DAG: #[[$TIMES4:.*]] = affine_map<()[s0] -> (s0 * 4)>
21
21
// CHECK-SCF-IF-DAG: #[[$TIMES8:.*]] = affine_map<()[s0] -> (s0 * 8)>
22
- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_32xf32 : memref<32xf32, #gpu.address_space<workgroup> >
23
- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_64xf32 : memref<64xf32, #gpu.address_space<workgroup> >
24
- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_128xf32 : memref<128xf32, #gpu.address_space<workgroup> >
25
- // CHECK-SCF-IF-DAG: memref.global "private" @__shared_256xf32 : memref<256xf32, #gpu.address_space<workgroup> >
22
+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_32xf32 : memref<32xf32, 3 >
23
+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_64xf32 : memref<64xf32, 3 >
24
+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_128xf32 : memref<128xf32, 3 >
25
+ // CHECK-SCF-IF-DAG: memref.global "private" @__shared_256xf32 : memref<256xf32, 3 >
26
26
27
27
// CHECK-SCF-IF-LABEL: func @rewrite_warp_op_to_scf_if(
28
28
// CHECK-SCF-IF-SAME: %[[laneid:.*]]: index,
@@ -47,8 +47,8 @@ func.func @rewrite_warp_op_to_scf_if(%laneid: index,
47
47
%r:2 = gpu.warp_execute_on_lane_0 (%laneid )[32 ]
48
48
args (%v0 , %v1 : vector <4 xf32 >, vector <8 xf32 >) -> (vector <1 xf32 >, vector <2 xf32 >) {
49
49
^bb0 (%arg0: vector <128 xf32 >, %arg1: vector <256 xf32 >):
50
- // CHECK-SCF-IF: %[[arg1:.*]] = vector.transfer_read %[[buffer_v1]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<256xf32, #gpu.address_space<workgroup> >, vector<256xf32>
51
- // CHECK-SCF-IF: %[[arg0:.*]] = vector.transfer_read %[[buffer_v0]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<128xf32, #gpu.address_space<workgroup> >, vector<128xf32>
50
+ // CHECK-SCF-IF: %[[arg1:.*]] = vector.transfer_read %[[buffer_v1]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<256xf32, 3 >, vector<256xf32>
51
+ // CHECK-SCF-IF: %[[arg0:.*]] = vector.transfer_read %[[buffer_v0]][%[[c0]]], %{{.*}} {in_bounds = [true]} : memref<128xf32, 3 >, vector<128xf32>
52
52
// CHECK-SCF-IF: %[[def_0:.*]] = "some_def"(%[[arg0]]) : (vector<128xf32>) -> vector<32xf32>
53
53
// CHECK-SCF-IF: %[[def_1:.*]] = "some_def"(%[[arg1]]) : (vector<256xf32>) -> vector<64xf32>
54
54
%2 = " some_def" (%arg0 ) : (vector <128 xf32 >) -> vector <32 xf32 >
@@ -60,8 +60,8 @@ func.func @rewrite_warp_op_to_scf_if(%laneid: index,
60
60
// CHECK-SCF-IF: }
61
61
// CHECK-SCF-IF: gpu.barrier
62
62
// CHECK-SCF-IF: %[[o1:.*]] = affine.apply #[[$TIMES2]]()[%[[laneid]]]
63
- // CHECK-SCF-IF: %[[r1:.*]] = vector.transfer_read %[[buffer_def_1]][%[[o1]]], %{{.*}} {in_bounds = [true]} : memref<64xf32, #gpu.address_space<workgroup> >, vector<2xf32>
64
- // CHECK-SCF-IF: %[[r0:.*]] = vector.transfer_read %[[buffer_def_0]][%[[laneid]]], %{{.*}} {in_bounds = [true]} : memref<32xf32, #gpu.address_space<workgroup> >, vector<1xf32>
63
+ // CHECK-SCF-IF: %[[r1:.*]] = vector.transfer_read %[[buffer_def_1]][%[[o1]]], %{{.*}} {in_bounds = [true]} : memref<64xf32, 3 >, vector<2xf32>
64
+ // CHECK-SCF-IF: %[[r0:.*]] = vector.transfer_read %[[buffer_def_0]][%[[laneid]]], %{{.*}} {in_bounds = [true]} : memref<32xf32, 3 >, vector<1xf32>
65
65
// CHECK-SCF-IF: "some_use"(%[[r0]]) : (vector<1xf32>) -> ()
66
66
// CHECK-SCF-IF: "some_use"(%[[r1]]) : (vector<2xf32>) -> ()
67
67
" some_use" (%r#0 ) : (vector <1 xf32 >) -> ()
@@ -1065,18 +1065,18 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v
1065
1065
args (%s0 , %v0 , %v1 , %v2 : f32 , vector <f32 >, vector <1 xf32 >, vector <1 x1 xf32 >) -> (f32 , vector <f32 >, vector <1 xf32 >, vector <1 x1 xf32 >) {
1066
1066
^bb0 (%bs0: f32 , %bv0: vector <f32 >, %bv1: vector <1 xf32 >, %bv2: vector <1 x1 xf32 >):
1067
1067
1068
- // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, #gpu.address_space<workgroup> >, vector<1x1xf32>
1069
- // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, #gpu.address_space<workgroup> >, vector<1xf32>
1070
- // CHECK-SCF-IF: vector.transfer_read {{.*}}[]{{.*}} : memref<f32, #gpu.address_space<workgroup> >, vector<f32>
1071
- // CHECK-SCF-IF: memref.load {{.*}}[%[[C0]]] : memref<1xf32, #gpu.address_space<workgroup> >
1068
+ // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, 3 >, vector<1x1xf32>
1069
+ // CHECK-SCF-IF: vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, 3 >, vector<1xf32>
1070
+ // CHECK-SCF-IF: vector.transfer_read {{.*}}[]{{.*}} : memref<f32, 3 >, vector<f32>
1071
+ // CHECK-SCF-IF: memref.load {{.*}}[%[[C0]]] : memref<1xf32, 3 >
1072
1072
// CHECK-SCF-IF: "some_def_0"(%{{.*}}) : (f32) -> f32
1073
1073
// CHECK-SCF-IF: "some_def_1"(%{{.*}}) : (vector<f32>) -> vector<f32>
1074
1074
// CHECK-SCF-IF: "some_def_1"(%{{.*}}) : (vector<1xf32>) -> vector<1xf32>
1075
1075
// CHECK-SCF-IF: "some_def_1"(%{{.*}}) : (vector<1x1xf32>) -> vector<1x1xf32>
1076
- // CHECK-SCF-IF: memref.store {{.*}}[%[[C0]]] : memref<1xf32, #gpu.address_space<workgroup> >
1077
- // CHECK-SCF-IF: vector.transfer_write {{.*}}[] : vector<f32>, memref<f32, #gpu.address_space<workgroup> >
1078
- // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]]] {in_bounds = [true]} : vector<1xf32>, memref<1xf32, #gpu.address_space<workgroup> >
1079
- // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x1xf32>, memref<1x1xf32, #gpu.address_space<workgroup> >
1076
+ // CHECK-SCF-IF: memref.store {{.*}}[%[[C0]]] : memref<1xf32, 3 >
1077
+ // CHECK-SCF-IF: vector.transfer_write {{.*}}[] : vector<f32>, memref<f32, 3 >
1078
+ // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]]] {in_bounds = [true]} : vector<1xf32>, memref<1xf32, 3 >
1079
+ // CHECK-SCF-IF: vector.transfer_write {{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<1x1xf32>, memref<1x1xf32, 3 >
1080
1080
1081
1081
%rs0 = " some_def_0" (%bs0 ) : (f32 ) -> f32
1082
1082
%rv0 = " some_def_1" (%bv0 ) : (vector <f32 >) -> vector <f32 >
@@ -1088,10 +1088,10 @@ func.func @warp_execute_has_broadcast_semantics(%laneid: index, %s0: f32, %v0: v
1088
1088
}
1089
1089
1090
1090
// CHECK-SCF-IF: gpu.barrier
1091
- // CHECK-SCF-IF: %[[RV2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, #gpu.address_space<workgroup> >, vector<1x1xf32>
1092
- // CHECK-SCF-IF: %[[RV1:.*]] = vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, #gpu.address_space<workgroup> >, vector<1xf32>
1093
- // CHECK-SCF-IF: %[[RV0:.*]] = vector.transfer_read {{.*}}[]{{.*}} : memref<f32, #gpu.address_space<workgroup> >, vector<f32>
1094
- // CHECK-SCF-IF: %[[RS0:.*]] = memref.load {{.*}}[%[[C0]]] : memref<1xf32, #gpu.address_space<workgroup> >
1091
+ // CHECK-SCF-IF: %[[RV2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]]{{.*}} {in_bounds = [true, true]} : memref<1x1xf32, 3 >, vector<1x1xf32>
1092
+ // CHECK-SCF-IF: %[[RV1:.*]] = vector.transfer_read {{.*}}[%[[C0]]]{{.*}} {in_bounds = [true]} : memref<1xf32, 3 >, vector<1xf32>
1093
+ // CHECK-SCF-IF: %[[RV0:.*]] = vector.transfer_read {{.*}}[]{{.*}} : memref<f32, 3 >, vector<f32>
1094
+ // CHECK-SCF-IF: %[[RS0:.*]] = memref.load {{.*}}[%[[C0]]] : memref<1xf32, 3 >
1095
1095
// CHECK-SCF-IF: return %[[RS0]], %[[RV0]], %[[RV1]], %[[RV2]] : f32, vector<f32>, vector<1xf32>, vector<1x1xf32>
1096
1096
return %r#0 , %r#1 , %r#2 , %r#3 : f32 , vector <f32 >, vector <1 xf32 >, vector <1 x1 xf32 >
1097
1097
}
@@ -1106,22 +1106,22 @@ func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, %
1106
1106
-> (vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >) {
1107
1107
// CHECK-SCF-IF-DAG: %[[C0:.*]] = arith.constant 0 : index
1108
1108
1109
- // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[LANEID]], %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x1xf32>, memref<32x64x1xf32, #gpu.address_space<workgroup> >
1109
+ // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[LANEID]], %c0, %c0] {in_bounds = [true, true, true]} : vector<1x64x1xf32>, memref<32x64x1xf32, 3 >
1110
1110
// CHECK-SCF-IF: %[[RID:.*]] = affine.apply #[[$TIMES2]]()[%[[LANEID]]]
1111
- // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[C0]], %[[RID]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x128xf32>, memref<1x64x128xf32, #gpu.address_space<workgroup> >
1111
+ // CHECK-SCF-IF: vector.transfer_write %{{.*}}, %{{.*}}[%[[C0]], %[[RID]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x128xf32>, memref<1x64x128xf32, 3 >
1112
1112
// CHECK-SCF-IF: gpu.barrier
1113
1113
1114
1114
// CHECK-SCF-IF: scf.if{{.*}}{
1115
1115
%r:2 = gpu.warp_execute_on_lane_0 (%laneid )[32 ]
1116
1116
args (%v0 , %v1 : vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >) -> (vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >) {
1117
1117
^bb0 (%arg0: vector <32 x64 x1 xf32 >, %arg1: vector <1 x64 x128 xf32 >):
1118
1118
1119
- // CHECK-SCF-IF-DAG: %[[SR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<32x64x1xf32, #gpu.address_space<workgroup> >, vector<32x64x1xf32>
1120
- // CHECK-SCF-IF-DAG: %[[SR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<1x64x128xf32, #gpu.address_space<workgroup> >, vector<1x64x128xf32>
1119
+ // CHECK-SCF-IF-DAG: %[[SR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<32x64x1xf32, 3 >, vector<32x64x1xf32>
1120
+ // CHECK-SCF-IF-DAG: %[[SR1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : memref<1x64x128xf32, 3 >, vector<1x64x128xf32>
1121
1121
// CHECK-SCF-IF: %[[W0:.*]] = "some_def_0"(%[[SR0]]) : (vector<32x64x1xf32>) -> vector<32x64x1xf32>
1122
1122
// CHECK-SCF-IF: %[[W1:.*]] = "some_def_1"(%[[SR1]]) : (vector<1x64x128xf32>) -> vector<1x64x128xf32>
1123
- // CHECK-SCF-IF-DAG: vector.transfer_write %[[W0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<32x64x1xf32>, memref<32x64x1xf32, #gpu.address_space<workgroup> >
1124
- // CHECK-SCF-IF-DAG: vector.transfer_write %[[W1]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x64x128xf32>, memref<1x64x128xf32, #gpu.address_space<workgroup> >
1123
+ // CHECK-SCF-IF-DAG: vector.transfer_write %[[W0]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<32x64x1xf32>, memref<32x64x1xf32, 3 >
1124
+ // CHECK-SCF-IF-DAG: vector.transfer_write %[[W1]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x64x128xf32>, memref<1x64x128xf32, 3 >
1125
1125
1126
1126
%r0 = " some_def_0" (%arg0 ) : (vector <32 x64 x1 xf32 >) -> vector <32 x64 x1 xf32 >
1127
1127
%r1 = " some_def_1" (%arg1 ) : (vector <1 x64 x128 xf32 >) -> vector <1 x64 x128 xf32 >
@@ -1132,8 +1132,8 @@ func.func @warp_execute_nd_distribute(%laneid: index, %v0: vector<1x64x1xf32>, %
1132
1132
1133
1133
// CHECK-SCF-IF: gpu.barrier
1134
1134
// CHECK-SCF-IF: %[[WID:.*]] = affine.apply #[[$TIMES2]]()[%[[LANEID]]]
1135
- // CHECK-SCF-IF-DAG: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[LANEID]], %[[C0]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<32x64x1xf32, #gpu.address_space<workgroup> >, vector<1x64x1xf32>
1136
- // CHECK-SCF-IF-DAG: %[[R1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[WID]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<1x64x128xf32, #gpu.address_space<workgroup> >, vector<1x2x128xf32>
1135
+ // CHECK-SCF-IF-DAG: %[[R0:.*]] = vector.transfer_read %{{.*}}[%[[LANEID]], %[[C0]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<32x64x1xf32, 3 >, vector<1x64x1xf32>
1136
+ // CHECK-SCF-IF-DAG: %[[R1:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[WID]], %[[C0]]], %cst {in_bounds = [true, true, true]} : memref<1x64x128xf32, 3 >, vector<1x2x128xf32>
1137
1137
// CHECK-SCF-IF: return %[[R0]], %[[R1]] : vector<1x64x1xf32>, vector<1x2x128xf32>
1138
1138
return %r#0 , %r#1 : vector <1 x64 x1 xf32 >, vector <1 x2 x128 xf32 >
1139
1139
}
0 commit comments