Skip to content

Commit 58d4fe2

Browse files
authored
[X86][EVEX512] Do not allow 512-bit memcpy without EVEX512 (#70420)
Solves crash mentioned in #65920.
1 parent 7046202 commit 58d4fe2

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ EVT X86TargetLowering::getOptimalMemOpType(
281281
if (Op.size() >= 16 &&
282282
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
283283
// FIXME: Check if unaligned 64-byte accesses are slow.
284-
if (Op.size() >= 64 && Subtarget.hasAVX512() &&
284+
if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
285285
(Subtarget.getPreferVectorWidth() >= 512)) {
286286
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
287287
}
@@ -395,7 +395,7 @@ bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context,
395395
return true;
396396
return false;
397397
case 512:
398-
if (Subtarget.hasAVX512())
398+
if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
399399
return true;
400400
return false;
401401
default:

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
180180
case TargetTransformInfo::RGK_Scalar:
181181
return TypeSize::getFixed(ST->is64Bit() ? 64 : 32);
182182
case TargetTransformInfo::RGK_FixedWidthVector:
183-
if (ST->hasAVX512() && PreferVectorWidth >= 512)
183+
if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512)
184184
return TypeSize::getFixed(512);
185185
if (ST->hasAVX() && PreferVectorWidth >= 256)
186186
return TypeSize::getFixed(256);
@@ -6131,7 +6131,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
61316131
// Only enable vector loads for equality comparison. Right now the vector
61326132
// version is not as fast for three way compare (see #33329).
61336133
const unsigned PreferredWidth = ST->getPreferVectorWidth();
6134-
if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
6134+
if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512())
6135+
Options.LoadSizes.push_back(64);
61356136
if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32);
61366137
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
61376138
}

llvm/test/CodeGen/X86/evex512-mem.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512
3+
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256
4+
5+
define void @test1() {
6+
; AVX512-LABEL: test1:
7+
; AVX512: # %bb.0:
8+
; AVX512-NEXT: movq 64, %rax
9+
; AVX512-NEXT: movq %rax, (%rax)
10+
; AVX512-NEXT: vmovups 0, %zmm0
11+
; AVX512-NEXT: vmovups %zmm0, (%rax)
12+
; AVX512-NEXT: vzeroupper
13+
; AVX512-NEXT: retq
14+
;
15+
; AVX256-LABEL: test1:
16+
; AVX256: # %bb.0:
17+
; AVX256-NEXT: movq 64, %rax
18+
; AVX256-NEXT: movq %rax, (%rax)
19+
; AVX256-NEXT: vmovups 0, %ymm0
20+
; AVX256-NEXT: vmovups 32, %ymm1
21+
; AVX256-NEXT: vmovups %ymm1, (%rax)
22+
; AVX256-NEXT: vmovups %ymm0, (%rax)
23+
; AVX256-NEXT: vzeroupper
24+
; AVX256-NEXT: retq
25+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false)
26+
ret void
27+
}
28+
29+
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

0 commit comments

Comments
 (0)