Skip to content

[X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID/CET instructions #76786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 29 additions & 38 deletions llvm/lib/Target/X86/X86DomainReassignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,40 +619,30 @@ void X86DomainReassignment::initConverters() {
std::make_unique<InstrReplacerDstCOPY>(From, To);
};

bool HasEGPR = STI->hasEGPR();
createReplacerDstCOPY(X86::MOVZX32rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacerDstCOPY(X86::MOVZX64rm16,
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC
createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));

createReplacerDstCOPY(X86::MOVZX32rr16,
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacerDstCOPY(X86::MOVZX64rr16,
HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));

if (STI->hasDQI()) {
createReplacerDstCOPY(X86::MOVZX16rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX32rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacerDstCOPY(X86::MOVZX64rm8,
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);

createReplacerDstCOPY(X86::MOVZX16rr8,
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX32rr8,
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX64rr8,
HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));

createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
}

auto createReplacer = [&](unsigned From, unsigned To) {
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
};

createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk));
createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
createReplacer(X86::NOT16r, X86::KNOTWrr);
Expand All @@ -661,14 +651,14 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR16rr, X86::KXORWrr);

if (STI->hasBWI()) {
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm));
createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm));

createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk));
createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk));

createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk));
createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk));

createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
Expand Down Expand Up @@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() {

// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
//createReplacer(X86::TEST32rr, X86::KTESTDrr);
//createReplacer(X86::TEST64rr, X86::KTESTQrr);
// createReplacer(X86::TEST32rr, X86::KTESTDrr);
// createReplacer(X86::TEST64rr, X86::KTESTQrr);
}

if (STI->hasDQI()) {
Expand All @@ -706,9 +696,9 @@ void X86DomainReassignment::initConverters() {

createReplacer(X86::AND8rr, X86::KANDBrr);

createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk));
createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk));

createReplacer(X86::NOT8r, X86::KNOTBrr);

Expand All @@ -719,11 +709,12 @@ void X86DomainReassignment::initConverters() {

// TODO: KTEST is not a replacement for TEST due to flag differences. Need
// to prove only Z flag is used.
//createReplacer(X86::TEST8rr, X86::KTESTBrr);
//createReplacer(X86::TEST16rr, X86::KTESTWrr);
// createReplacer(X86::TEST8rr, X86::KTESTBrr);
// createReplacer(X86::TEST16rr, X86::KTESTWrr);

createReplacer(X86::XOR8rr, X86::KXORBrr);
}
#undef GET_EGPR_IF_ENABLED
}

bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
default:
llvm_unreachable("Unexpected intrinsic.");
#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
case Intrinsic::x86_sse42_crc32_32_8:
Opc = X86::CRC32r32r8;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8);
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_16:
Opc = X86::CRC32r32r16;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16);
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_32:
Opc = X86::CRC32r32r32;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32);
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_64_64:
Opc = X86::CRC32r64r64;
Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64);
RC = &X86::GR64RegClass;
break;
#undef GET_EGPR_IF_ENABLED
}

const Value *LHS = II->getArgOperand(0);
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/X86/X86InstrSystem.td
Original file line number Diff line number Diff line change
Expand Up @@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode, HasINVPCID]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
Requires<[In64BitMode, HasINVPCID]>;
Requires<[In64BitMode]>;

def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>,
EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
} // SchedRW

let Predicates = [In64BitMode, HasINVPCID] in {
let Predicates = [HasINVPCID, NoEGPR] in {
// The instruction can only use a 64 bit register as the register argument
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
// corresponding to it.
Expand All @@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in {
addr:$src2)>;
}

let Predicates = [HasINVPCID, HasEGPR] in {
def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
(INVPCID64_EVEX
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
addr:$src2)>;
}


//===----------------------------------------------------------------------===//
// SMAP Instruction
Expand Down
41 changes: 30 additions & 11 deletions llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86
; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR

; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c

Expand All @@ -21,9 +22,15 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_mm_crc32_u8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32b %sil, %eax
; X64-NEXT: retq
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; X64-NEXT: crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; X64-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc32_u8:
; EGPR: # %bb.0:
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%trunc = trunc i32 %a1 to i8
%res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
ret i32 %res
Expand All @@ -41,9 +48,15 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_mm_crc32_u16:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32w %si, %eax
; X64-NEXT: retq
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; X64-NEXT: crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc32_u16:
; EGPR: # %bb.0:
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%trunc = trunc i32 %a1 to i16
%res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
ret i32 %res
Expand All @@ -59,9 +72,15 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
;
; X64-LABEL: test_mm_crc32_u32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: crc32l %esi, %eax
; X64-NEXT: retq
; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; X64-NEXT: crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc32_u32:
; EGPR: # %bb.0:
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
ret i32 %res
}
Expand Down
29 changes: 21 additions & 8 deletions llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s
; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR

; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c

; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
; CHECK-LABEL: test_mm_crc64_u8:
; CHECK: # %bb.0:
; CHECK-NEXT: crc32b %sil, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: crc32b %sil, %edi # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc64_u8:
; EGPR: # %bb.0:
; EGPR-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
; EGPR-NEXT: retq # encoding: [0xc3]
%trunc = trunc i32 %a1 to i8
%res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
ret i64 %res
Expand All @@ -20,9 +27,15 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
; CHECK-LABEL: test_mm_crc64_u64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: crc32q %rsi, %rax
; CHECK-NEXT: retq
; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: test_mm_crc64_u64:
; EGPR: # %bb.0:
; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
; EGPR-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
ret i64 %res
}
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR

define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
; X86-LABEL: crc32_32_8:
Expand All @@ -14,6 +15,12 @@ define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_32_8:
; EGPR: ## %bb.0:
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
Expand All @@ -31,6 +38,12 @@ define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_32_16:
; EGPR: ## %bb.0:
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; EGPR-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
ret i32 %tmp
}
Expand All @@ -48,6 +61,12 @@ define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
; X64-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_32_32:
; EGPR: ## %bb.0:
; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
; EGPR-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
ret i32 %tmp
}
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR

declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
Expand All @@ -10,6 +11,12 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
; CHECK-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_64_8:
; EGPR: ## %bb.0:
; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
ret i64 %tmp
}
Expand All @@ -20,6 +27,12 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
; CHECK-NEXT: retq ## encoding: [0xc3]
;
; EGPR-LABEL: crc32_64_64:
; EGPR: ## %bb.0:
; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
; EGPR-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
; EGPR-NEXT: retq ## encoding: [0xc3]
%tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
ret i64 %tmp
}
Loading