Skip to content

Commit 57c0c4a

Browse files
committed
[X86] Fix crash with i64 bitreverse on 32-bit targets with XOP.
We unconditionally marked i64 as Custom, but did not install a handler in ReplaceNodeResults when i64 isn't legal type. This leads to ReplaceNodeResults asserting. We have two options to fix this. Only mark i64 as Custom on 64-bit targets and let it expand to two i32 bitreverses which each need a VPPERM. Or the other option is to add the Custom handling to ReplaceNodeResults. This is what I went with.
1 parent 918e343 commit 57c0c4a

File tree

2 files changed

+205
-0
lines changed

2 files changed

+205
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30730,6 +30730,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3073030730
Results.push_back(V);
3073130731
return;
3073230732
}
30733+
case ISD::BITREVERSE:
30734+
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
30735+
assert(Subtarget.hasXOP() && "Expected XOP");
30736+
// We can use VPPERM by copying to a vector register and back. We'll need
30737+
// to move the scalar in two i32 pieces.
30738+
Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG));
30739+
return;
3073330740
}
3073430741
}
3073530742

llvm/test/CodeGen/X86/bitreverse.ll

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
33
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
4+
; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=X86XOP
45

56
; These tests just check that the plumbing is in place for @llvm.bitreverse. The
67
; actual output is massive at the moment as llvm.bitreverse is not yet legal.
@@ -75,6 +76,11 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
7576
; X64-NEXT: psrlw $1, %xmm0
7677
; X64-NEXT: por %xmm1, %xmm0
7778
; X64-NEXT: retq
79+
;
80+
; X86XOP-LABEL: test_bitreverse_v2i16:
81+
; X86XOP: # %bb.0:
82+
; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
83+
; X86XOP-NEXT: retl
7884
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
7985
ret <2 x i16> %b
8086
}
@@ -145,6 +151,14 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
145151
; X64-NEXT: shrq %rdx
146152
; X64-NEXT: leaq (%rdx,%rcx,2), %rax
147153
; X64-NEXT: retq
154+
;
155+
; X86XOP-LABEL: test_bitreverse_i64:
156+
; X86XOP: # %bb.0:
157+
; X86XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
158+
; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
159+
; X86XOP-NEXT: vmovd %xmm0, %eax
160+
; X86XOP-NEXT: vpextrd $1, %xmm0, %edx
161+
; X86XOP-NEXT: retl
148162
%b = call i64 @llvm.bitreverse.i64(i64 %a)
149163
ret i64 %b
150164
}
@@ -195,6 +209,13 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
195209
; X64-NEXT: shrl %eax
196210
; X64-NEXT: leal (%rax,%rcx,2), %eax
197211
; X64-NEXT: retq
212+
;
213+
; X86XOP-LABEL: test_bitreverse_i32:
214+
; X86XOP: # %bb.0:
215+
; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
216+
; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
217+
; X86XOP-NEXT: vmovd %xmm0, %eax
218+
; X86XOP-NEXT: retl
198219
%b = call i32 @llvm.bitreverse.i32(i32 %a)
199220
ret i32 %b
200221
}
@@ -247,6 +268,14 @@ define i24 @test_bitreverse_i24(i24 %a) nounwind {
247268
; X64-NEXT: leal (%rax,%rcx,2), %eax
248269
; X64-NEXT: shrl $8, %eax
249270
; X64-NEXT: retq
271+
;
272+
; X86XOP-LABEL: test_bitreverse_i24:
273+
; X86XOP: # %bb.0:
274+
; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
275+
; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
276+
; X86XOP-NEXT: vmovd %xmm0, %eax
277+
; X86XOP-NEXT: shrl $8, %eax
278+
; X86XOP-NEXT: retl
250279
%b = call i24 @llvm.bitreverse.i24(i24 %a)
251280
ret i24 %b
252281
}
@@ -299,6 +328,14 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
299328
; X64-NEXT: leal (%rax,%rcx,2), %eax
300329
; X64-NEXT: # kill: def $ax killed $ax killed $eax
301330
; X64-NEXT: retq
331+
;
332+
; X86XOP-LABEL: test_bitreverse_i16:
333+
; X86XOP: # %bb.0:
334+
; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
335+
; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
336+
; X86XOP-NEXT: vmovd %xmm0, %eax
337+
; X86XOP-NEXT: # kill: def $ax killed $ax killed $eax
338+
; X86XOP-NEXT: retl
302339
%b = call i16 @llvm.bitreverse.i16(i16 %a)
303340
ret i16 %b
304341
}
@@ -342,6 +379,14 @@ define i8 @test_bitreverse_i8(i8 %a) {
342379
; X64-NEXT: addl %edi, %eax
343380
; X64-NEXT: # kill: def $al killed $al killed $eax
344381
; X64-NEXT: retq
382+
;
383+
; X86XOP-LABEL: test_bitreverse_i8:
384+
; X86XOP: # %bb.0:
385+
; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
386+
; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
387+
; X86XOP-NEXT: vmovd %xmm0, %eax
388+
; X86XOP-NEXT: # kill: def $al killed $al killed $eax
389+
; X86XOP-NEXT: retl
345390
%b = call i8 @llvm.bitreverse.i8(i8 %a)
346391
ret i8 %b
347392
}
@@ -387,6 +432,15 @@ define i4 @test_bitreverse_i4(i4 %a) {
387432
; X64-NEXT: shrb $4, %al
388433
; X64-NEXT: # kill: def $al killed $al killed $eax
389434
; X64-NEXT: retq
435+
;
436+
; X86XOP-LABEL: test_bitreverse_i4:
437+
; X86XOP: # %bb.0:
438+
; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
439+
; X86XOP-NEXT: vpperm {{\.LCPI.*}}, %xmm0, %xmm0, %xmm0
440+
; X86XOP-NEXT: vmovd %xmm0, %eax
441+
; X86XOP-NEXT: shrb $4, %al
442+
; X86XOP-NEXT: # kill: def $al killed $al killed $eax
443+
; X86XOP-NEXT: retl
390444
%b = call i4 @llvm.bitreverse.i4(i4 %a)
391445
ret i4 %b
392446
}
@@ -404,6 +458,11 @@ define <2 x i16> @fold_v2i16() {
404458
; X64: # %bb.0:
405459
; X64-NEXT: movaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u>
406460
; X64-NEXT: retq
461+
;
462+
; X86XOP-LABEL: fold_v2i16:
463+
; X86XOP: # %bb.0:
464+
; X86XOP-NEXT: vmovaps {{.*#+}} xmm0 = <61440,240,u,u,u,u,u,u>
465+
; X86XOP-NEXT: retl
407466
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>)
408467
ret <2 x i16> %b
409468
}
@@ -418,6 +477,11 @@ define i24 @fold_i24() {
418477
; X64: # %bb.0:
419478
; X64-NEXT: movl $2048, %eax # imm = 0x800
420479
; X64-NEXT: retq
480+
;
481+
; X86XOP-LABEL: fold_i24:
482+
; X86XOP: # %bb.0:
483+
; X86XOP-NEXT: movl $2048, %eax # imm = 0x800
484+
; X86XOP-NEXT: retl
421485
%b = call i24 @llvm.bitreverse.i24(i24 4096)
422486
ret i24 %b
423487
}
@@ -432,6 +496,11 @@ define i8 @fold_i8() {
432496
; X64: # %bb.0:
433497
; X64-NEXT: movb $-16, %al
434498
; X64-NEXT: retq
499+
;
500+
; X86XOP-LABEL: fold_i8:
501+
; X86XOP: # %bb.0:
502+
; X86XOP-NEXT: movb $-16, %al
503+
; X86XOP-NEXT: retl
435504
%b = call i8 @llvm.bitreverse.i8(i8 15)
436505
ret i8 %b
437506
}
@@ -446,6 +515,11 @@ define i4 @fold_i4() {
446515
; X64: # %bb.0:
447516
; X64-NEXT: movb $1, %al
448517
; X64-NEXT: retq
518+
;
519+
; X86XOP-LABEL: fold_i4:
520+
; X86XOP: # %bb.0:
521+
; X86XOP-NEXT: movb $1, %al
522+
; X86XOP-NEXT: retl
449523
%b = call i4 @llvm.bitreverse.i4(i4 8)
450524
ret i4 %b
451525
}
@@ -463,6 +537,11 @@ define i8 @identity_i8(i8 %a) {
463537
; X64-NEXT: movl %edi, %eax
464538
; X64-NEXT: # kill: def $al killed $al killed $eax
465539
; X64-NEXT: retq
540+
;
541+
; X86XOP-LABEL: identity_i8:
542+
; X86XOP: # %bb.0:
543+
; X86XOP-NEXT: movb {{[0-9]+}}(%esp), %al
544+
; X86XOP-NEXT: retl
466545
%b = call i8 @llvm.bitreverse.i8(i8 %a)
467546
%c = call i8 @llvm.bitreverse.i8(i8 %b)
468547
ret i8 %c
@@ -478,6 +557,10 @@ define <2 x i16> @identity_v2i16(<2 x i16> %a) {
478557
; X64-LABEL: identity_v2i16:
479558
; X64: # %bb.0:
480559
; X64-NEXT: retq
560+
;
561+
; X86XOP-LABEL: identity_v2i16:
562+
; X86XOP: # %bb.0:
563+
; X86XOP-NEXT: retl
481564
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
482565
%c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b)
483566
ret <2 x i16> %c
@@ -493,6 +576,10 @@ define i8 @undef_i8() {
493576
; X64-LABEL: undef_i8:
494577
; X64: # %bb.0:
495578
; X64-NEXT: retq
579+
;
580+
; X86XOP-LABEL: undef_i8:
581+
; X86XOP: # %bb.0:
582+
; X86XOP-NEXT: retl
496583
%b = call i8 @llvm.bitreverse.i8(i8 undef)
497584
ret i8 %b
498585
}
@@ -505,6 +592,10 @@ define <2 x i16> @undef_v2i16() {
505592
; X64-LABEL: undef_v2i16:
506593
; X64: # %bb.0:
507594
; X64-NEXT: retq
595+
;
596+
; X86XOP-LABEL: undef_v2i16:
597+
; X86XOP: # %bb.0:
598+
; X86XOP-NEXT: retl
508599
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef)
509600
ret <2 x i16> %b
510601
}
@@ -1122,6 +1213,113 @@ define i528 @large_promotion(i528 %A) nounwind {
11221213
; X64-NEXT: popq %r15
11231214
; X64-NEXT: popq %rbp
11241215
; X64-NEXT: retq
1216+
;
1217+
; X86XOP-LABEL: large_promotion:
1218+
; X86XOP: # %bb.0:
1219+
; X86XOP-NEXT: pushl %ebp
1220+
; X86XOP-NEXT: pushl %ebx
1221+
; X86XOP-NEXT: pushl %edi
1222+
; X86XOP-NEXT: pushl %esi
1223+
; X86XOP-NEXT: subl $44, %esp
1224+
; X86XOP-NEXT: vmovdqa {{.*#+}} xmm0 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88]
1225+
; X86XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
1226+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1227+
; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1228+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1229+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1230+
; X86XOP-NEXT: vmovd %xmm1, %ecx
1231+
; X86XOP-NEXT: shrdl $16, %ecx, %eax
1232+
; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1233+
; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1234+
; X86XOP-NEXT: shrdl $16, %eax, %ecx
1235+
; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1236+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1237+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1238+
; X86XOP-NEXT: vmovd %xmm1, %ecx
1239+
; X86XOP-NEXT: shrdl $16, %ecx, %eax
1240+
; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1241+
; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1242+
; X86XOP-NEXT: shrdl $16, %eax, %ecx
1243+
; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1244+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1245+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1246+
; X86XOP-NEXT: vmovd %xmm1, %ecx
1247+
; X86XOP-NEXT: shrdl $16, %ecx, %eax
1248+
; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1249+
; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1250+
; X86XOP-NEXT: shrdl $16, %eax, %ecx
1251+
; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1252+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1253+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1254+
; X86XOP-NEXT: vmovd %xmm1, %ecx
1255+
; X86XOP-NEXT: shrdl $16, %ecx, %eax
1256+
; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1257+
; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1258+
; X86XOP-NEXT: shrdl $16, %eax, %ecx
1259+
; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1260+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1261+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1262+
; X86XOP-NEXT: vmovd %xmm1, %ecx
1263+
; X86XOP-NEXT: shrdl $16, %ecx, %eax
1264+
; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1265+
; X86XOP-NEXT: vpextrd $1, %xmm1, %eax
1266+
; X86XOP-NEXT: shrdl $16, %eax, %ecx
1267+
; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1268+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1269+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1270+
; X86XOP-NEXT: vmovd %xmm1, %ebp
1271+
; X86XOP-NEXT: shrdl $16, %ebp, %eax
1272+
; X86XOP-NEXT: movl %eax, (%esp) # 4-byte Spill
1273+
; X86XOP-NEXT: vpextrd $1, %xmm1, %ebx
1274+
; X86XOP-NEXT: shrdl $16, %ebx, %ebp
1275+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1276+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1
1277+
; X86XOP-NEXT: vmovd %xmm1, %esi
1278+
; X86XOP-NEXT: shrdl $16, %esi, %ebx
1279+
; X86XOP-NEXT: vpextrd $1, %xmm1, %edx
1280+
; X86XOP-NEXT: shrdl $16, %edx, %esi
1281+
; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
1282+
; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm0
1283+
; X86XOP-NEXT: vmovd %xmm0, %ecx
1284+
; X86XOP-NEXT: shrdl $16, %ecx, %edx
1285+
; X86XOP-NEXT: vpextrd $1, %xmm0, %edi
1286+
; X86XOP-NEXT: shrdl $16, %edi, %ecx
1287+
; X86XOP-NEXT: movl {{[0-9]+}}(%esp), %eax
1288+
; X86XOP-NEXT: movl %ecx, 60(%eax)
1289+
; X86XOP-NEXT: movl %edx, 56(%eax)
1290+
; X86XOP-NEXT: movl %esi, 52(%eax)
1291+
; X86XOP-NEXT: movl %ebx, 48(%eax)
1292+
; X86XOP-NEXT: movl %ebp, 44(%eax)
1293+
; X86XOP-NEXT: movl (%esp), %ecx # 4-byte Reload
1294+
; X86XOP-NEXT: movl %ecx, 40(%eax)
1295+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1296+
; X86XOP-NEXT: movl %ecx, 36(%eax)
1297+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1298+
; X86XOP-NEXT: movl %ecx, 32(%eax)
1299+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1300+
; X86XOP-NEXT: movl %ecx, 28(%eax)
1301+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1302+
; X86XOP-NEXT: movl %ecx, 24(%eax)
1303+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1304+
; X86XOP-NEXT: movl %ecx, 20(%eax)
1305+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1306+
; X86XOP-NEXT: movl %ecx, 16(%eax)
1307+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1308+
; X86XOP-NEXT: movl %ecx, 12(%eax)
1309+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1310+
; X86XOP-NEXT: movl %ecx, 8(%eax)
1311+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1312+
; X86XOP-NEXT: movl %ecx, 4(%eax)
1313+
; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1314+
; X86XOP-NEXT: movl %ecx, (%eax)
1315+
; X86XOP-NEXT: shrl $16, %edi
1316+
; X86XOP-NEXT: movw %di, 64(%eax)
1317+
; X86XOP-NEXT: addl $44, %esp
1318+
; X86XOP-NEXT: popl %esi
1319+
; X86XOP-NEXT: popl %edi
1320+
; X86XOP-NEXT: popl %ebx
1321+
; X86XOP-NEXT: popl %ebp
1322+
; X86XOP-NEXT: retl $4
11251323
%Z = call i528 @llvm.bitreverse.i528(i528 %A)
11261324
ret i528 %Z
11271325
}

0 commit comments

Comments
 (0)