Skip to content

[X86][EVEX512] Do not allow 512-bit memcpy without EVEX512 #70420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLoweringCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
// FIXME: Check if unaligned 64-byte accesses are slow.
if (Op.size() >= 64 && Subtarget.hasAVX512() &&
if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
(Subtarget.getPreferVectorWidth() >= 512)) {
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
}
Expand Down Expand Up @@ -395,7 +395,7 @@ bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
return true;
return false;
case 512:
if (Subtarget.hasAVX512())
if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
return true;
return false;
default:
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
case TargetTransformInfo::RGK_FixedWidthVector:
if (ST->hasAVX512() && PreferVectorWidth >= 512)
if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
return TypeSize::getFixed(512);
if (ST->hasAVX() && PreferVectorWidth >= 256)
return TypeSize::getFixed(256);
Expand Down Expand Up @@ -6131,7 +6131,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
// Only enable vector loads for equality comparison. Right now the vector
// version is not as fast for three way compare (see #33329).
const unsigned PreferredWidth = ST->getPreferVectorWidth();
if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512())
Options.LoadSizes.push_back(64);
if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
}
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/X86/evex512-mem.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256

define void @test1() {
; AVX512-LABEL: test1:
; AVX512: # %bb.0:
; AVX512-NEXT: movq 64, %rax
; AVX512-NEXT: movq %rax, (%rax)
; AVX512-NEXT: vmovups 0, %zmm0
; AVX512-NEXT: vmovups %zmm0, (%rax)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
; AVX256-LABEL: test1:
; AVX256: # %bb.0:
; AVX256-NEXT: movq 64, %rax
; AVX256-NEXT: movq %rax, (%rax)
; AVX256-NEXT: vmovups 0, %ymm0
; AVX256-NEXT: vmovups 32, %ymm1
; AVX256-NEXT: vmovups %ymm1, (%rax)
; AVX256-NEXT: vmovups %ymm0, (%rax)
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false)
ret void
}

declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)