Skip to content

Commit 5368c10

Browse files
committed
[X86][FastISel] Handle CRC32 intrinsics
Some applications make heavy use of the crc32 operation (e.g., as part of a hash function), so having a FastISel path avoids fallbacks to SelectionDAG and improves compile times, in our case by ~1.5%. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D148023
1 parent b86b468 commit 5368c10

File tree

3 files changed

+126
-4
lines changed

3 files changed

+126
-4
lines changed

llvm/lib/Target/X86/X86FastISel.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3030,6 +3030,58 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
30303030
updateValueMap(II, ResultReg);
30313031
return true;
30323032
}
3033+
case Intrinsic::x86_sse42_crc32_32_8:
3034+
case Intrinsic::x86_sse42_crc32_32_16:
3035+
case Intrinsic::x86_sse42_crc32_32_32:
3036+
case Intrinsic::x86_sse42_crc32_64_64: {
3037+
if (!Subtarget->hasCRC32())
3038+
return false;
3039+
3040+
Type *RetTy = II->getCalledFunction()->getReturnType();
3041+
3042+
MVT VT;
3043+
if (!isTypeLegal(RetTy, VT))
3044+
return false;
3045+
3046+
unsigned Opc;
3047+
const TargetRegisterClass *RC = nullptr;
3048+
3049+
switch (II->getIntrinsicID()) {
3050+
default:
3051+
llvm_unreachable("Unexpected intrinsic.");
3052+
case Intrinsic::x86_sse42_crc32_32_8:
3053+
Opc = X86::CRC32r32r8;
3054+
RC = &X86::GR32RegClass;
3055+
break;
3056+
case Intrinsic::x86_sse42_crc32_32_16:
3057+
Opc = X86::CRC32r32r16;
3058+
RC = &X86::GR32RegClass;
3059+
break;
3060+
case Intrinsic::x86_sse42_crc32_32_32:
3061+
Opc = X86::CRC32r32r32;
3062+
RC = &X86::GR32RegClass;
3063+
break;
3064+
case Intrinsic::x86_sse42_crc32_64_64:
3065+
Opc = X86::CRC32r64r64;
3066+
RC = &X86::GR64RegClass;
3067+
break;
3068+
}
3069+
3070+
const Value *LHS = II->getArgOperand(0);
3071+
const Value *RHS = II->getArgOperand(1);
3072+
3073+
Register LHSReg = getRegForValue(LHS);
3074+
Register RHSReg = getRegForValue(RHS);
3075+
if (!LHSReg || !RHSReg)
3076+
return false;
3077+
3078+
Register ResultReg = fastEmitInst_rr(Opc, RC, LHSReg, RHSReg);
3079+
if (!ResultReg)
3080+
return false;
3081+
3082+
updateValueMap(II, ResultReg);
3083+
return true;
3084+
}
30333085
}
30343086
}
30353087

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no-generate-body-for-unused-prefixes
2+
; i686 uses -fast-isel-abort=1 only as argument lowering is not supported, so check that FastISel didn't miss the call.
3+
; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty
4+
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86
5+
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
6+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64
7+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64
8+
9+
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
10+
11+
; STDERR-X86-NOT: FastISel missed call: %res = call i32 @llvm.x86.sse42.crc32
12+
13+
; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
14+
define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
15+
; X86-LABEL: test_mm_crc32_u8:
16+
; X86: # %bb.0:
17+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
18+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
19+
; X86-NEXT: crc32b %cl, %eax
20+
; X86-NEXT: retl
21+
;
22+
; X64-LABEL: test_mm_crc32_u8:
23+
; X64: # %bb.0:
24+
; X64-NEXT: movl %edi, %eax
25+
; X64-NEXT: crc32b %sil, %eax
26+
; X64-NEXT: retq
27+
%trunc = trunc i32 %a1 to i8
28+
%res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
29+
ret i32 %res
30+
}
31+
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
32+
33+
; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
34+
define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
35+
; X86-LABEL: test_mm_crc32_u16:
36+
; X86: # %bb.0:
37+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
38+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
39+
; X86-NEXT: crc32w %cx, %eax
40+
; X86-NEXT: retl
41+
;
42+
; X64-LABEL: test_mm_crc32_u16:
43+
; X64: # %bb.0:
44+
; X64-NEXT: movl %edi, %eax
45+
; X64-NEXT: crc32w %si, %eax
46+
; X64-NEXT: retq
47+
%trunc = trunc i32 %a1 to i16
48+
%res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
49+
ret i32 %res
50+
}
51+
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
52+
53+
define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
54+
; X86-LABEL: test_mm_crc32_u32:
55+
; X86: # %bb.0:
56+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
57+
; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax
58+
; X86-NEXT: retl
59+
;
60+
; X64-LABEL: test_mm_crc32_u32:
61+
; X64: # %bb.0:
62+
; X64-NEXT: movl %edi, %eax
63+
; X64-NEXT: crc32l %esi, %eax
64+
; X64-NEXT: retq
65+
%res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
66+
ret i32 %res
67+
}
68+
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone

llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
3-
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
2+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
3+
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
44

55
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
66

7-
define i64 @test_mm_crc64_u8(i64 %a0, i8 %a1) nounwind{
7+
; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
8+
define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
89
; CHECK-LABEL: test_mm_crc64_u8:
910
; CHECK: # %bb.0:
1011
; CHECK-NEXT: crc32b %sil, %edi
1112
; CHECK-NEXT: movl %edi, %eax
1213
; CHECK-NEXT: retq
13-
%res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
14+
%trunc = trunc i32 %a1 to i8
15+
%res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
1416
ret i64 %res
1517
}
1618
declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone

0 commit comments

Comments
 (0)