1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
2
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
3
3
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
4
+ ; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=X86XOP
4
5
5
6
; These tests just check that the plumbing is in place for @llvm.bitreverse. The
6
7
; actual output is massive at the moment as llvm.bitreverse is not yet legal.
@@ -75,6 +76,11 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
75
76
; X64-NEXT: psrlw $1, %xmm0
76
77
; X64-NEXT: por %xmm1, %xmm0
77
78
; X64-NEXT: retq
79
+ ;
80
+ ; X86XOP-LABEL: test_bitreverse_v2i16:
81
+ ; X86XOP: # %bb.0:
82
+ ; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
83
+ ; X86XOP-NEXT: retl
78
84
%b = call <2 x i16 > @llvm.bitreverse.v2i16 (<2 x i16 > %a )
79
85
ret <2 x i16 > %b
80
86
}
@@ -145,6 +151,14 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
145
151
; X64-NEXT: shrq %rdx
146
152
; X64-NEXT: leaq (%rdx,%rcx,2), %rax
147
153
; X64-NEXT: retq
154
+ ;
155
+ ; X86XOP-LABEL: test_bitreverse_i64:
156
+ ; X86XOP: # %bb.0:
157
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
158
+ ; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
159
+ ; X86XOP-NEXT: vmovd %xmm0, %eax
160
+ ; X86XOP-NEXT: vpextrd $1, %xmm0, %edx
161
+ ; X86XOP-NEXT: retl
148
162
%b = call i64 @llvm.bitreverse.i64 (i64 %a )
149
163
ret i64 %b
150
164
}
@@ -195,6 +209,13 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
195
209
; X64-NEXT: shrl %eax
196
210
; X64-NEXT: leal (%rax,%rcx,2), %eax
197
211
; X64-NEXT: retq
212
+ ;
213
+ ; X86XOP-LABEL: test_bitreverse_i32:
214
+ ; X86XOP: # %bb.0:
215
+ ; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
216
+ ; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
217
+ ; X86XOP-NEXT: vmovd %xmm0, %eax
218
+ ; X86XOP-NEXT: retl
198
219
%b = call i32 @llvm.bitreverse.i32 (i32 %a )
199
220
ret i32 %b
200
221
}
@@ -247,6 +268,14 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
247
268
; X64-NEXT: leal (%rax,%rcx,2), %eax
248
269
; X64-NEXT: shrl $8, %eax
249
270
; X64-NEXT: retq
271
+ ;
272
+ ; X86XOP-LABEL: test_bitreverse_i24:
273
+ ; X86XOP: # %bb.0:
274
+ ; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
275
+ ; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
276
+ ; X86XOP-NEXT: vmovd %xmm0, %eax
277
+ ; X86XOP-NEXT: shrl $8, %eax
278
+ ; X86XOP-NEXT: retl
250
279
%b = call i24 @llvm.bitreverse.i24 (i24 %a )
251
280
ret i24 %b
252
281
}
@@ -299,6 +328,14 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
299
328
; X64-NEXT: leal (%rax,%rcx,2), %eax
300
329
; X64-NEXT: # kill: def $ax killed $ax killed $eax
301
330
; X64-NEXT: retq
331
+ ;
332
+ ; X86XOP-LABEL: test_bitreverse_i16:
333
+ ; X86XOP: # %bb.0:
334
+ ; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
335
+ ; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
336
+ ; X86XOP-NEXT: vmovd %xmm0, %eax
337
+ ; X86XOP-NEXT: # kill: def $ax killed $ax killed $eax
338
+ ; X86XOP-NEXT: retl
302
339
%b = call i16 @llvm.bitreverse.i16 (i16 %a )
303
340
ret i16 %b
304
341
}
@@ -342,6 +379,14 @@ define i8 @test_bitreverse_i8(i8 %a) {
342
379
; X64-NEXT: addl %edi, %eax
343
380
; X64-NEXT: # kill: def $al killed $al killed $eax
344
381
; X64-NEXT: retq
382
+ ;
383
+ ; X86XOP-LABEL: test_bitreverse_i8:
384
+ ; X86XOP: # %bb.0:
385
+ ; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
386
+ ; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
387
+ ; X86XOP-NEXT: vmovd %xmm0, %eax
388
+ ; X86XOP-NEXT: # kill: def $al killed $al killed $eax
389
+ ; X86XOP-NEXT: retl
345
390
%b = call i8 @llvm.bitreverse.i8 (i8 %a )
346
391
ret i8 %b
347
392
}
@@ -387,6 +432,15 @@ define i4 @test_bitreverse_i4(i4 %a) {
387
432
; X64-NEXT: shrb $4, %al
388
433
; X64-NEXT: # kill: def $al killed $al killed $eax
389
434
; X64-NEXT: retq
435
+ ;
436
+ ; X86XOP-LABEL: test_bitreverse_i4:
437
+ ; X86XOP: # %bb.0:
438
+ ; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
439
+ ; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
440
+ ; X86XOP-NEXT: vmovd %xmm0, %eax
441
+ ; X86XOP-NEXT: shrb $4, %al
442
+ ; X86XOP-NEXT: # kill: def $al killed $al killed $eax
443
+ ; X86XOP-NEXT: retl
390
444
%b = call i4 @llvm.bitreverse.i4 (i4 %a )
391
445
ret i4 %b
392
446
}
@@ -404,6 +458,11 @@ define <2 x i16> @fold_v2i16() {
404
458
; X64: # %bb.0:
405
459
; X64-NEXT: movaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u>
406
460
; X64-NEXT: retq
461
+ ;
462
+ ; X86XOP-LABEL: fold_v2i16:
463
+ ; X86XOP: # %bb.0:
464
+ ; X86XOP-NEXT: vmovaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u>
465
+ ; X86XOP-NEXT: retl
407
466
%b = call <2 x i16 > @llvm.bitreverse.v2i16 (<2 x i16 > <i16 15 , i16 3840 >)
408
467
ret <2 x i16 > %b
409
468
}
@@ -418,6 +477,11 @@ define i24 @fold_i24() {
418
477
; X64: # %bb.0:
419
478
; X64-NEXT: movl $2048, %eax # imm = 0x800
420
479
; X64-NEXT: retq
480
+ ;
481
+ ; X86XOP-LABEL: fold_i24:
482
+ ; X86XOP: # %bb.0:
483
+ ; X86XOP-NEXT: movl $2048, %eax # imm = 0x800
484
+ ; X86XOP-NEXT: retl
421
485
%b = call i24 @llvm.bitreverse.i24 (i24 4096 )
422
486
ret i24 %b
423
487
}
@@ -432,6 +496,11 @@ define i8 @fold_i8() {
432
496
; X64: # %bb.0:
433
497
; X64-NEXT: movb $-16, %al
434
498
; X64-NEXT: retq
499
+ ;
500
+ ; X86XOP-LABEL: fold_i8:
501
+ ; X86XOP: # %bb.0:
502
+ ; X86XOP-NEXT: movb $-16, %al
503
+ ; X86XOP-NEXT: retl
435
504
%b = call i8 @llvm.bitreverse.i8 (i8 15 )
436
505
ret i8 %b
437
506
}
@@ -446,6 +515,11 @@ define i4 @fold_i4() {
446
515
; X64: # %bb.0:
447
516
; X64-NEXT: movb $1, %al
448
517
; X64-NEXT: retq
518
+ ;
519
+ ; X86XOP-LABEL: fold_i4:
520
+ ; X86XOP: # %bb.0:
521
+ ; X86XOP-NEXT: movb $1, %al
522
+ ; X86XOP-NEXT: retl
449
523
%b = call i4 @llvm.bitreverse.i4 (i4 8 )
450
524
ret i4 %b
451
525
}
@@ -463,6 +537,11 @@ define i8 @identity_i8(i8 %a) {
463
537
; X64-NEXT: movl %edi, %eax
464
538
; X64-NEXT: # kill: def $al killed $al killed $eax
465
539
; X64-NEXT: retq
540
+ ;
541
+ ; X86XOP-LABEL: identity_i8:
542
+ ; X86XOP: # %bb.0:
543
+ ; X86XOP-NEXT: movb {{[0-9]+}}(%esp), %al
544
+ ; X86XOP-NEXT: retl
466
545
%b = call i8 @llvm.bitreverse.i8 (i8 %a )
467
546
%c = call i8 @llvm.bitreverse.i8 (i8 %b )
468
547
ret i8 %c
@@ -478,6 +557,10 @@ define <2 x i16> @identity_v2i16(<2 x i16> %a) {
478
557
; X64-LABEL: identity_v2i16:
479
558
; X64: # %bb.0:
480
559
; X64-NEXT: retq
560
+ ;
561
+ ; X86XOP-LABEL: identity_v2i16:
562
+ ; X86XOP: # %bb.0:
563
+ ; X86XOP-NEXT: retl
481
564
%b = call <2 x i16 > @llvm.bitreverse.v2i16 (<2 x i16 > %a )
482
565
%c = call <2 x i16 > @llvm.bitreverse.v2i16 (<2 x i16 > %b )
483
566
ret <2 x i16 > %c
@@ -493,6 +576,10 @@ define i8 @undef_i8() {
493
576
; X64-LABEL: undef_i8:
494
577
; X64: # %bb.0:
495
578
; X64-NEXT: retq
579
+ ;
580
+ ; X86XOP-LABEL: undef_i8:
581
+ ; X86XOP: # %bb.0:
582
+ ; X86XOP-NEXT: retl
496
583
%b = call i8 @llvm.bitreverse.i8 (i8 undef )
497
584
ret i8 %b
498
585
}
@@ -505,6 +592,10 @@ define <2 x i16> @undef_v2i16() {
505
592
; X64-LABEL: undef_v2i16:
506
593
; X64: # %bb.0:
507
594
; X64-NEXT: retq
595
+ ;
596
+ ; X86XOP-LABEL: undef_v2i16:
597
+ ; X86XOP: # %bb.0:
598
+ ; X86XOP-NEXT: retl
508
599
%b = call <2 x i16 > @llvm.bitreverse.v2i16 (<2 x i16 > undef )
509
600
ret <2 x i16 > %b
510
601
}
@@ -1122,6 +1213,113 @@ define i528 @large_promotion(i528 %A) nounwind {
1122
1213
; X64-NEXT: popq %r15
1123
1214
; X64-NEXT: popq %rbp
1124
1215
; X64-NEXT: retq
1216
+ ;
1217
+ ; X86XOP-LABEL: large_promotion:
1218
+ ; X86XOP: # %bb.0:
1219
+ ; X86XOP-NEXT: pushl %ebp
1220
+ ; X86XOP-NEXT: pushl %ebx
1221
+ ; X86XOP-NEXT: pushl %edi
1222
+ ; X86XOP-NEXT: pushl %esi
1223
+ ; X86XOP-NEXT: subl $44, %esp
1224
+ ; X86XOP-NEXT: vmovdqa {{.*#+}} xmm0 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88]
1225
+ ; X86XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1226
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1227
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1228
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1229
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1230
+ ; X86XOP-NEXT: vmovd %xmm1, %ecx
1231
+ ; X86XOP-NEXT: shrdl $16, %ecx, %eax
1232
+ ; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1233
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1234
+ ; X86XOP-NEXT: shrdl $16, %eax, %ecx
1235
+ ; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1236
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1237
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1238
+ ; X86XOP-NEXT: vmovd %xmm1, %ecx
1239
+ ; X86XOP-NEXT: shrdl $16, %ecx, %eax
1240
+ ; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1241
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1242
+ ; X86XOP-NEXT: shrdl $16, %eax, %ecx
1243
+ ; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1244
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1245
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1246
+ ; X86XOP-NEXT: vmovd %xmm1, %ecx
1247
+ ; X86XOP-NEXT: shrdl $16, %ecx, %eax
1248
+ ; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1249
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1250
+ ; X86XOP-NEXT: shrdl $16, %eax, %ecx
1251
+ ; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1252
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1253
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1254
+ ; X86XOP-NEXT: vmovd %xmm1, %ecx
1255
+ ; X86XOP-NEXT: shrdl $16, %ecx, %eax
1256
+ ; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1257
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1258
+ ; X86XOP-NEXT: shrdl $16, %eax, %ecx
1259
+ ; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1260
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1261
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1262
+ ; X86XOP-NEXT: vmovd %xmm1, %ecx
1263
+ ; X86XOP-NEXT: shrdl $16, %ecx, %eax
1264
+ ; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1265
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1266
+ ; X86XOP-NEXT: shrdl $16, %eax, %ecx
1267
+ ; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1268
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1269
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1270
+ ; X86XOP-NEXT: vmovd %xmm1, %ebp
1271
+ ; X86XOP-NEXT: shrdl $16, %ebp, %eax
1272
+ ; X86XOP-NEXT: movl %eax, (%esp) # 4-byte Spill
1273
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %ebx
1274
+ ; X86XOP-NEXT: shrdl $16, %ebx, %ebp
1275
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1276
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1277
+ ; X86XOP-NEXT: vmovd %xmm1, %esi
1278
+ ; X86XOP-NEXT: shrdl $16, %esi, %ebx
1279
+ ; X86XOP-NEXT: vpextrd $1, %xmm1, %edx
1280
+ ; X86XOP-NEXT: shrdl $16, %edx, %esi
1281
+ ; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1282
+ ; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm0
1283
+ ; X86XOP-NEXT: vmovd %xmm0, %ecx
1284
+ ; X86XOP-NEXT: shrdl $16, %ecx, %edx
1285
+ ; X86XOP-NEXT: vpextrd $1, %xmm0, %edi
1286
+ ; X86XOP-NEXT: shrdl $16, %edi, %ecx
1287
+ ; X86XOP-NEXT: movl {{[0-9]+}}(%esp), %eax
1288
+ ; X86XOP-NEXT: movl %ecx, 60(%eax)
1289
+ ; X86XOP-NEXT: movl %edx, 56(%eax)
1290
+ ; X86XOP-NEXT: movl %esi, 52(%eax)
1291
+ ; X86XOP-NEXT: movl %ebx, 48(%eax)
1292
+ ; X86XOP-NEXT: movl %ebp, 44(%eax)
1293
+ ; X86XOP-NEXT: movl (%esp), %ecx # 4-byte Reload
1294
+ ; X86XOP-NEXT: movl %ecx, 40(%eax)
1295
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1296
+ ; X86XOP-NEXT: movl %ecx, 36(%eax)
1297
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1298
+ ; X86XOP-NEXT: movl %ecx, 32(%eax)
1299
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1300
+ ; X86XOP-NEXT: movl %ecx, 28(%eax)
1301
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1302
+ ; X86XOP-NEXT: movl %ecx, 24(%eax)
1303
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1304
+ ; X86XOP-NEXT: movl %ecx, 20(%eax)
1305
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1306
+ ; X86XOP-NEXT: movl %ecx, 16(%eax)
1307
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1308
+ ; X86XOP-NEXT: movl %ecx, 12(%eax)
1309
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1310
+ ; X86XOP-NEXT: movl %ecx, 8(%eax)
1311
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1312
+ ; X86XOP-NEXT: movl %ecx, 4(%eax)
1313
+ ; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1314
+ ; X86XOP-NEXT: movl %ecx, (%eax)
1315
+ ; X86XOP-NEXT: shrl $16, %edi
1316
+ ; X86XOP-NEXT: movw %di, 64(%eax)
1317
+ ; X86XOP-NEXT: addl $44, %esp
1318
+ ; X86XOP-NEXT: popl %esi
1319
+ ; X86XOP-NEXT: popl %edi
1320
+ ; X86XOP-NEXT: popl %ebx
1321
+ ; X86XOP-NEXT: popl %ebp
1322
+ ; X86XOP-NEXT: retl $4
1125
1323
%Z = call i528 @llvm.bitreverse.i528 (i528 %A )
1126
1324
ret i528 %Z
1127
1325
}
0 commit comments