@@ -42,7 +42,7 @@ func.func @vector_transfer_read_i2() -> vector<3xi2> {
42
42
43
43
// -----
44
44
45
- func.func @vector_cst_maskedload_i2 (%passthru: vector <5 xi2 >) -> vector <3 x5 xi2 > {
45
+ func.func @vector_constant_mask_maskedload_i2 (%passthru: vector <5 xi2 >) -> vector <3 x5 xi2 > {
46
46
%0 = memref.alloc () : memref <3 x5 xi2 >
47
47
%cst = arith.constant dense <0 > : vector <3 x5 xi2 >
48
48
%mask = vector.constant_mask [3 ] : vector <5 xi1 >
@@ -54,7 +54,7 @@ func.func @vector_cst_maskedload_i2(%passthru: vector<5xi2>) -> vector<3x5xi2> {
54
54
return %2 : vector <3 x5 xi2 >
55
55
}
56
56
57
- // CHECK-LABEL: func @vector_cst_maskedload_i2 (
57
+ // CHECK-LABEL: func @vector_constant_mask_maskedload_i2 (
58
58
// CHECK-SAME: %[[ARG0:.+]]: vector<5xi2>) -> vector<3x5xi2>
59
59
// CHECK: %[[ORIGINMASK:.+]] = vector.constant_mask [3] : vector<5xi1>
60
60
// CHECK: %[[NEWMASK:.+]] = arith.constant dense<true> : vector<2xi1>
@@ -74,6 +74,55 @@ func.func @vector_cst_maskedload_i2(%passthru: vector<5xi2>) -> vector<3x5xi2> {
74
74
75
75
// -----
76
76
77
+ // This tests the correctness of generating compressed mask with `vector.create_mask` on a static input and dynamic indices.
78
+ // Specifically, the program masked loads a vector<5xi2> from `vector<3x5xi2>[1, 0]`, with an unknown mask generator `m`.
79
+ // After emulation transformation, it masked loads 2 bytes from linearized index `vector<4xi8>[1]`, with a new compressed mask
80
+ // given by `ceildiv(m + 1, 4)`.
81
+ func.func @unaligned_create_mask_dynamic_i2 (%m : index , %passthru: vector <5 xi2 >) -> vector <5 xi2 > {
82
+ %0 = memref.alloc () : memref <3 x5 xi2 >
83
+ %c0 = arith.constant 0 : index
84
+ %c1 = arith.constant 1 : index
85
+ %mask = vector.create_mask %m : vector <5 xi1 >
86
+ %1 = vector.maskedload %0 [%c1 , %c0 ], %mask , %passthru :
87
+ memref <3 x5 xi2 >, vector <5 xi1 >, vector <5 xi2 > into vector <5 xi2 >
88
+ return %1 : vector <5 xi2 >
89
+ }
90
+
91
+ // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> ((s0 + 1) ceildiv 4)>
92
+ // CHECK: func @unaligned_create_mask_dynamic_i2(
93
+ // CHECK-SAME: %[[NUM_ELEMS_TO_LOAD:.+]]: index, %[[PASSTHRU:.+]]: vector<5xi2>)
94
+ // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<4xi8>
95
+ // CHECK: %[[COMPRESSED_MASK:.+]] = affine.apply #map()[%[[NUM_ELEMS_TO_LOAD]]]
96
+ // CHECK: vector.create_mask %[[COMPRESSED_MASK]] : vector<2xi1>
97
+ // CHECK: %[[C1:.+]] = arith.constant 1 : index
98
+ // CHECK: vector.maskedload %[[ALLOC]][%[[C1]]]
99
+
100
+ // -----
101
+
102
+ // This tests the correctness of generated compressed mask with `vector.create_mask`, and a static input.
103
+ // Quite the same as the previous test, but the mask generator is a static value.
104
+ // In this case, the desired slice `vector<7xi2>` spans over 3 bytes.
105
+ func.func @check_unaligned_create_mask_static_i2 (%passthru: vector <7 xi2 >) -> vector <7 xi2 > {
106
+ %0 = memref.alloc () : memref <3 x7 xi2 >
107
+ %c0 = arith.constant 0 : index
108
+ %c1 = arith.constant 1 : index
109
+ %c3 = arith.constant 3 : index
110
+ %mask = vector.create_mask %c3 : vector <7 xi1 >
111
+ %1 = vector.maskedload %0 [%c1 , %c0 ], %mask , %passthru :
112
+ memref <3 x7 xi2 >, vector <7 xi1 >, vector <7 xi2 > into vector <7 xi2 >
113
+ return %1 : vector <7 xi2 >
114
+ }
115
+
116
+ // CHECK: func @check_unaligned_create_mask_static_i2(
117
+ // CHECK-SAME: %[[PASSTHRU:[a-zA-Z0-9]+]]: vector<7xi2>)
118
+ // CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<6xi8>
119
+ // CHECK: %[[C2:.+]] = arith.constant 2 : index
120
+ // CHECK: %[[COMP_MASK:.+]] = vector.create_mask %[[C2]] : vector<3xi1>
121
+ // CHECK: %[[C1:.+]] = arith.constant 1 : index
122
+ // CHECK: %4 = vector.maskedload %[[ALLOC]][%[[C1]]], %[[COMP_MASK]]
123
+
124
+ // -----
125
+
77
126
func.func @vector_load_i2_dynamic_indexing (%idx1: index , %idx2: index ) -> vector <3 xi2 > {
78
127
%0 = memref.alloc () : memref <3 x3 xi2 >
79
128
%cst = arith.constant dense <0 > : vector <3 x3 xi2 >
0 commit comments