1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
- ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK,CHECK-LE %s
3
- ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK,CHECK-LE %s
4
- ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK,CHECK -BE %s
2
+ ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK-LE-COMMON ,CHECK-LE %s
3
+ ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK-LE-COMMON ,CHECK-LE-SVE %s
4
+ ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK-BE %s
5
5
6
6
define <2 x i64 > @scalarize_v2i64 (ptr %p , <2 x i1 > %mask , <2 x i64 > %passthru ) {
7
7
; CHECK-LE-LABEL: @scalarize_v2i64(
@@ -28,6 +28,10 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
28
28
; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
29
29
; CHECK-LE-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
30
30
;
31
+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64(
32
+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 128, <2 x i1> [[MASK:%.*]], <2 x i64> [[PASSTHRU:%.*]])
33
+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
34
+ ;
31
35
; CHECK-BE-LABEL: @scalarize_v2i64(
32
36
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
33
37
; CHECK-BE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], -2
@@ -57,58 +61,83 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
57
61
}
58
62
59
63
define <2 x i64 > @scalarize_v2i64_ones_mask (ptr %p , <2 x i64 > %passthru ) {
60
- ; CHECK-LABEL: @scalarize_v2i64_ones_mask(
61
- ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
62
- ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
64
+ ; CHECK-LE-LABEL: @scalarize_v2i64_ones_mask(
65
+ ; CHECK-LE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
66
+ ; CHECK-LE-NEXT: ret <2 x i64> [[TMP1]]
67
+ ;
68
+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_ones_mask(
69
+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
70
+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
71
+ ;
72
+ ; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask(
73
+ ; CHECK-BE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
74
+ ; CHECK-BE-NEXT: ret <2 x i64> [[TMP1]]
63
75
;
64
76
%ret = call <2 x i64 > @llvm.masked.load.v2i64.p0 (ptr %p , i32 8 , <2 x i1 > <i1 true , i1 true >, <2 x i64 > %passthru )
65
77
ret <2 x i64 > %ret
66
78
}
67
79
68
80
define <2 x i64 > @scalarize_v2i64_zero_mask (ptr %p , <2 x i64 > %passthru ) {
69
- ; CHECK-LABEL: @scalarize_v2i64_zero_mask(
70
- ; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
81
+ ; CHECK-LE-LABEL: @scalarize_v2i64_zero_mask(
82
+ ; CHECK-LE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
83
+ ;
84
+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_zero_mask(
85
+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> zeroinitializer, <2 x i64> [[PASSTHRU:%.*]])
86
+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
87
+ ;
88
+ ; CHECK-BE-LABEL: @scalarize_v2i64_zero_mask(
89
+ ; CHECK-BE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
71
90
;
72
91
%ret = call <2 x i64 > @llvm.masked.load.v2i64.p0 (ptr %p , i32 8 , <2 x i1 > <i1 false , i1 false >, <2 x i64 > %passthru )
73
92
ret <2 x i64 > %ret
74
93
}
75
94
76
95
define <2 x i64 > @scalarize_v2i64_const_mask (ptr %p , <2 x i64 > %passthru ) {
77
- ; CHECK-LABEL: @scalarize_v2i64_const_mask(
78
- ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
79
- ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
80
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
81
- ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
96
+ ; CHECK-LE-LABEL: @scalarize_v2i64_const_mask(
97
+ ; CHECK-LE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
98
+ ; CHECK-LE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
99
+ ; CHECK-LE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
100
+ ; CHECK-LE-NEXT: ret <2 x i64> [[TMP3]]
101
+ ;
102
+ ; CHECK-LE-SVE-LABEL: @scalarize_v2i64_const_mask(
103
+ ; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
104
+ ; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
105
+ ;
106
+ ; CHECK-BE-LABEL: @scalarize_v2i64_const_mask(
107
+ ; CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
108
+ ; CHECK-BE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
109
+ ; CHECK-BE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
110
+ ; CHECK-BE-NEXT: ret <2 x i64> [[TMP3]]
82
111
;
83
112
%ret = call <2 x i64 > @llvm.masked.load.v2i64.p0 (ptr %p , i32 8 , <2 x i1 > <i1 false , i1 true >, <2 x i64 > %passthru )
84
113
ret <2 x i64 > %ret
85
114
}
86
115
87
116
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
88
117
define <2 x i24 > @scalarize_v2i24 (ptr %p , <2 x i1 > %mask , <2 x i24 > %passthru ) {
89
- ; CHECK-LE-LABEL: @scalarize_v2i24(
90
- ; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
91
- ; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
92
- ; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
93
- ; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
94
- ; CHECK-LE: cond.load:
95
- ; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
96
- ; CHECK-LE-NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
97
- ; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
98
- ; CHECK-LE-NEXT: br label [[ELSE]]
99
- ; CHECK-LE: else:
100
- ; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
101
- ; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
102
- ; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
103
- ; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
104
- ; CHECK-LE: cond.load1:
105
- ; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
106
- ; CHECK-LE-NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
107
- ; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
108
- ; CHECK-LE-NEXT: br label [[ELSE2]]
109
- ; CHECK-LE: else2:
110
- ; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
111
- ; CHECK-LE-NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
118
+ ; CHECK-LE-COMMON- LABEL: @scalarize_v2i24(
119
+ ; CHECK-LE-COMMON- NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
120
+ ; CHECK-LE-COMMON- NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
121
+ ; CHECK-LE-COMMON- NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
122
+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
123
+ ; CHECK-LE-COMMON : cond.load:
124
+ ; CHECK-LE-COMMON- NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
125
+ ; CHECK-LE-COMMON- NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
126
+ ; CHECK-LE-COMMON- NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
127
+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE]]
128
+ ; CHECK-LE-COMMON : else:
129
+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
130
+ ; CHECK-LE-COMMON- NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
131
+ ; CHECK-LE-COMMON- NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
132
+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
133
+ ; CHECK-LE-COMMON : cond.load1:
134
+ ; CHECK-LE-COMMON- NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
135
+ ; CHECK-LE-COMMON- NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
136
+ ; CHECK-LE-COMMON- NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
137
+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE2]]
138
+ ; CHECK-LE-COMMON : else2:
139
+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
140
+ ; CHECK-LE-COMMON- NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
112
141
;
113
142
; CHECK-BE-LABEL: @scalarize_v2i24(
114
143
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
@@ -140,29 +169,29 @@ define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
140
169
141
170
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
142
171
define <2 x i48 > @scalarize_v2i48 (ptr %p , <2 x i1 > %mask , <2 x i48 > %passthru ) {
143
- ; CHECK-LE-LABEL: @scalarize_v2i48(
144
- ; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
145
- ; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
146
- ; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
147
- ; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
148
- ; CHECK-LE: cond.load:
149
- ; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
150
- ; CHECK-LE-NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
151
- ; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
152
- ; CHECK-LE-NEXT: br label [[ELSE]]
153
- ; CHECK-LE: else:
154
- ; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
155
- ; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
156
- ; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
157
- ; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
158
- ; CHECK-LE: cond.load1:
159
- ; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
160
- ; CHECK-LE-NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
161
- ; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
162
- ; CHECK-LE-NEXT: br label [[ELSE2]]
163
- ; CHECK-LE: else2:
164
- ; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
165
- ; CHECK-LE-NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
172
+ ; CHECK-LE-COMMON- LABEL: @scalarize_v2i48(
173
+ ; CHECK-LE-COMMON- NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
174
+ ; CHECK-LE-COMMON- NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
175
+ ; CHECK-LE-COMMON- NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
176
+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
177
+ ; CHECK-LE-COMMON : cond.load:
178
+ ; CHECK-LE-COMMON- NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
179
+ ; CHECK-LE-COMMON- NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
180
+ ; CHECK-LE-COMMON- NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
181
+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE]]
182
+ ; CHECK-LE-COMMON : else:
183
+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
184
+ ; CHECK-LE-COMMON- NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
185
+ ; CHECK-LE-COMMON- NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
186
+ ; CHECK-LE-COMMON- NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
187
+ ; CHECK-LE-COMMON : cond.load1:
188
+ ; CHECK-LE-COMMON- NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
189
+ ; CHECK-LE-COMMON- NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
190
+ ; CHECK-LE-COMMON- NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
191
+ ; CHECK-LE-COMMON- NEXT: br label [[ELSE2]]
192
+ ; CHECK-LE-COMMON : else2:
193
+ ; CHECK-LE-COMMON- NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
194
+ ; CHECK-LE-COMMON- NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
166
195
;
167
196
; CHECK-BE-LABEL: @scalarize_v2i48(
168
197
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
0 commit comments