@@ -13,6 +13,17 @@ define i32 @load_extract_idx_0(ptr %x) {
13
13
ret i32 %r
14
14
}
15
15
16
+ define i32 @vscale_load_extract_idx_0 (ptr %x ) {
17
+ ; CHECK-LABEL: @vscale_load_extract_idx_0(
18
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
19
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i32 0
20
+ ; CHECK-NEXT: ret i32 [[R]]
21
+ ;
22
+ %lv = load <vscale x 4 x i32 >, ptr %x
23
+ %r = extractelement <vscale x 4 x i32 > %lv , i32 0
24
+ ret i32 %r
25
+ }
26
+
16
27
; If the original load had a smaller alignment than the scalar type, the
17
28
; smaller alignment should be used.
18
29
define i32 @load_extract_idx_0_small_alignment (ptr %x ) {
@@ -48,6 +59,17 @@ define i32 @load_extract_idx_2(ptr %x) {
48
59
ret i32 %r
49
60
}
50
61
62
+ define i32 @vscale_load_extract_idx_2 (ptr %x ) {
63
+ ; CHECK-LABEL: @vscale_load_extract_idx_2(
64
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
65
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i32 2
66
+ ; CHECK-NEXT: ret i32 [[R]]
67
+ ;
68
+ %lv = load <vscale x 4 x i32 >, ptr %x
69
+ %r = extractelement <vscale x 4 x i32 > %lv , i32 2
70
+ ret i32 %r
71
+ }
72
+
51
73
define i32 @load_extract_idx_3 (ptr %x ) {
52
74
; CHECK-LABEL: @load_extract_idx_3(
53
75
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3
@@ -72,6 +94,17 @@ define i32 @load_extract_idx_4(ptr %x) {
72
94
ret i32 %r
73
95
}
74
96
97
+ define i32 @vscale_load_extract_idx_4 (ptr %x ) {
98
+ ; CHECK-LABEL: @vscale_load_extract_idx_4(
99
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
100
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i32 4
101
+ ; CHECK-NEXT: ret i32 [[R]]
102
+ ;
103
+ %lv = load <vscale x 4 x i32 >, ptr %x
104
+ %r = extractelement <vscale x 4 x i32 > %lv , i32 4
105
+ ret i32 %r
106
+ }
107
+
75
108
define i32 @load_extract_idx_var_i64 (ptr %x , i64 %idx ) {
76
109
; CHECK-LABEL: @load_extract_idx_var_i64(
77
110
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
@@ -104,6 +137,25 @@ entry:
104
137
ret i32 %r
105
138
}
106
139
140
+ define i32 @vscale_load_extract_idx_var_i64_known_valid_by_assume (ptr %x , i64 %idx ) {
141
+ ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_assume(
142
+ ; CHECK-NEXT: entry:
143
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4
144
+ ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
145
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
146
+ ; CHECK-NEXT: call void @maythrow()
147
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]]
148
+ ; CHECK-NEXT: ret i32 [[R]]
149
+ ;
150
+ entry:
151
+ %cmp = icmp ult i64 %idx , 4
152
+ call void @llvm.assume (i1 %cmp )
153
+ %lv = load <vscale x 4 x i32 >, ptr %x
154
+ call void @maythrow ()
155
+ %r = extractelement <vscale x 4 x i32 > %lv , i64 %idx
156
+ ret i32 %r
157
+ }
158
+
107
159
declare i1 @cond ()
108
160
109
161
define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block (ptr %x , i64 %idx , i1 %c.1 ) {
@@ -213,6 +265,45 @@ entry:
213
265
ret i32 %r
214
266
}
215
267
268
+ define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0 (ptr %x , i64 %idx ) {
269
+ ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0(
270
+ ; CHECK-NEXT: entry:
271
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5
272
+ ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
273
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
274
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]]
275
+ ; CHECK-NEXT: ret i32 [[R]]
276
+ ;
277
+ entry:
278
+ %cmp = icmp ult i64 %idx , 5
279
+ call void @llvm.assume (i1 %cmp )
280
+ %lv = load <vscale x 4 x i32 >, ptr %x
281
+ %r = extractelement <vscale x 4 x i32 > %lv , i64 %idx
282
+ ret i32 %r
283
+ }
284
+
285
+ define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1 (ptr %x , i64 %idx ) {
286
+ ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1(
287
+ ; CHECK-NEXT: entry:
288
+ ; CHECK-NEXT: [[VS:%.*]] = call i64 @llvm.vscale.i64()
289
+ ; CHECK-NEXT: [[VM:%.*]] = mul i64 [[VS]], 4
290
+ ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], [[VM]]
291
+ ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
292
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
293
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]]
294
+ ; CHECK-NEXT: ret i32 [[R]]
295
+ ;
296
+ entry:
297
+ %vs = call i64 @llvm.vscale.i64 ()
298
+ %vm = mul i64 %vs , 4
299
+ %cmp = icmp ult i64 %idx , %vm
300
+ call void @llvm.assume (i1 %cmp )
301
+ %lv = load <vscale x 4 x i32 >, ptr %x
302
+ %r = extractelement <vscale x 4 x i32 > %lv , i64 %idx
303
+ ret i32 %r
304
+ }
305
+
306
+ declare i64 @llvm.vscale.i64 ()
216
307
declare void @llvm.assume (i1 )
217
308
218
309
define i32 @load_extract_idx_var_i64_known_valid_by_and (ptr %x , i64 %idx ) {
@@ -230,6 +321,21 @@ entry:
230
321
ret i32 %r
231
322
}
232
323
324
+ define i32 @vscale_load_extract_idx_var_i64_known_valid_by_and (ptr %x , i64 %idx ) {
325
+ ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_and(
326
+ ; CHECK-NEXT: entry:
327
+ ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3
328
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
329
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
330
+ ; CHECK-NEXT: ret i32 [[R]]
331
+ ;
332
+ entry:
333
+ %idx.clamped = and i64 %idx , 3
334
+ %lv = load <vscale x 4 x i32 >, ptr %x
335
+ %r = extractelement <vscale x 4 x i32 > %lv , i64 %idx.clamped
336
+ ret i32 %r
337
+ }
338
+
233
339
define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef (ptr %x , i64 noundef %idx ) {
234
340
; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and_noundef(
235
341
; CHECK-NEXT: entry:
@@ -260,6 +366,21 @@ entry:
260
366
ret i32 %r
261
367
}
262
368
369
+ define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_and (ptr %x , i64 %idx ) {
370
+ ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_and(
371
+ ; CHECK-NEXT: entry:
372
+ ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4
373
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
374
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
375
+ ; CHECK-NEXT: ret i32 [[R]]
376
+ ;
377
+ entry:
378
+ %idx.clamped = and i64 %idx , 4
379
+ %lv = load <vscale x 4 x i32 >, ptr %x
380
+ %r = extractelement <vscale x 4 x i32 > %lv , i64 %idx.clamped
381
+ ret i32 %r
382
+ }
383
+
263
384
define i32 @load_extract_idx_var_i64_known_valid_by_urem (ptr %x , i64 %idx ) {
264
385
; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem(
265
386
; CHECK-NEXT: entry:
@@ -275,6 +396,21 @@ entry:
275
396
ret i32 %r
276
397
}
277
398
399
+ define i32 @vscale_load_extract_idx_var_i64_known_valid_by_urem (ptr %x , i64 %idx ) {
400
+ ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_urem(
401
+ ; CHECK-NEXT: entry:
402
+ ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4
403
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
404
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
405
+ ; CHECK-NEXT: ret i32 [[R]]
406
+ ;
407
+ entry:
408
+ %idx.clamped = urem i64 %idx , 4
409
+ %lv = load <vscale x 4 x i32 >, ptr %x
410
+ %r = extractelement <vscale x 4 x i32 > %lv , i64 %idx.clamped
411
+ ret i32 %r
412
+ }
413
+
278
414
define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef (ptr %x , i64 noundef %idx ) {
279
415
; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem_noundef(
280
416
; CHECK-NEXT: entry:
@@ -305,6 +441,21 @@ entry:
305
441
ret i32 %r
306
442
}
307
443
444
+ define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_urem (ptr %x , i64 %idx ) {
445
+ ; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_urem(
446
+ ; CHECK-NEXT: entry:
447
+ ; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5
448
+ ; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
449
+ ; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
450
+ ; CHECK-NEXT: ret i32 [[R]]
451
+ ;
452
+ entry:
453
+ %idx.clamped = urem i64 %idx , 5
454
+ %lv = load <vscale x 4 x i32 >, ptr %x
455
+ %r = extractelement <vscale x 4 x i32 > %lv , i64 %idx.clamped
456
+ ret i32 %r
457
+ }
458
+
308
459
define i32 @load_extract_idx_var_i32 (ptr %x , i32 %idx ) {
309
460
; CHECK-LABEL: @load_extract_idx_var_i32(
310
461
; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
0 commit comments