Skip to content

Commit dc32e91

Browse files
committed
[X86] Teach X86SelectionDAGInfo::EmitTargetCodeForMemcpy about GNUX32
Summary: In GNUX23, is64BitMode returns true, but pointers are 32-bits. So we shouldn't copy pointer values into RSI/RDI since the widths don't match. Fixes PR38865 despite what the title says. I think the llvm_unreachable in the copyPhysReg code tricked the optimizer and made the fatal error trigger. Reviewers: rnk, efriedma, MatzeB, echristo Reviewed By: efriedma Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D51893 llvm-svn: 342015
1 parent ee9bb87 commit dc32e91

File tree

3 files changed

+83
-24
lines changed

3 files changed

+83
-24
lines changed

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -394,30 +394,41 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
394394
//
395395
let SchedRW = [WriteMicrocoded] in {
396396
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
397-
def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
398-
[(X86rep_movs i8)]>, REP,
399-
Requires<[Not64BitMode]>;
400-
def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
401-
[(X86rep_movs i16)]>, REP, OpSize16,
402-
Requires<[Not64BitMode]>;
403-
def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
404-
[(X86rep_movs i32)]>, REP, OpSize32,
405-
Requires<[Not64BitMode]>;
397+
def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins),
398+
"{rep;movsb (%esi), %es:(%edi)|rep movsb es:[edi], [esi]}",
399+
[(X86rep_movs i8)]>, REP, AdSize32,
400+
Requires<[NotLP64]>;
401+
def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins),
402+
"{rep;movsw (%esi), %es:(%edi)|rep movsw es:[edi], [esi]}",
403+
[(X86rep_movs i16)]>, REP, AdSize32, OpSize16,
404+
Requires<[NotLP64]>;
405+
def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins),
406+
"{rep;movsl (%esi), %es:(%edi)|rep movsd es:[edi], [esi]}",
407+
[(X86rep_movs i32)]>, REP, AdSize32, OpSize32,
408+
Requires<[NotLP64]>;
409+
def REP_MOVSQ_32 : RI<0xA5, RawFrm, (outs), (ins),
410+
"{rep;movsq (%esi), %es:(%edi)|rep movsq es:[edi], [esi]}",
411+
[(X86rep_movs i64)]>, REP, AdSize32,
412+
Requires<[NotLP64, In64BitMode]>;
406413
}
407414

408415
let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
409-
def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
410-
[(X86rep_movs i8)]>, REP,
411-
Requires<[In64BitMode]>;
412-
def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
413-
[(X86rep_movs i16)]>, REP, OpSize16,
414-
Requires<[In64BitMode]>;
415-
def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
416-
[(X86rep_movs i32)]>, REP, OpSize32,
417-
Requires<[In64BitMode]>;
418-
def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
419-
[(X86rep_movs i64)]>, REP,
420-
Requires<[In64BitMode]>;
416+
def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins),
417+
"{rep;movsb (%rsi), %es:(%rdi)|rep movsb es:[rdi], [rsi]}",
418+
[(X86rep_movs i8)]>, REP, AdSize64,
419+
Requires<[IsLP64]>;
420+
def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins),
421+
"{rep;movsw (%rsi), %es:(%rdi)|rep movsw es:[rdi], [rsi]}",
422+
[(X86rep_movs i16)]>, REP, AdSize64, OpSize16,
423+
Requires<[IsLP64]>;
424+
def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins),
425+
"{rep;movsl (%rsi), %es:(%rdi)|rep movsdi es:[rdi], [rsi]}",
426+
[(X86rep_movs i32)]>, REP, AdSize64, OpSize32,
427+
Requires<[IsLP64]>;
428+
def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins),
429+
"{rep;movsq (%rsi), %es:(%rdi)|rep movsq es:[rdi], [rsi]}",
430+
[(X86rep_movs i64)]>, REP, AdSize64,
431+
Requires<[IsLP64]>;
421432
}
422433

423434
// FIXME: Should use "(X86rep_stos AL)" as the pattern.

llvm/lib/Target/X86/X86SelectionDAGInfo.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,14 +255,15 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
255255
}
256256
}
257257

258+
bool Use64BitRegs = Subtarget.isTarget64BitLP64();
258259
SDValue InFlag;
259-
Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
260+
Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
260261
DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
261262
InFlag = Chain.getValue(1);
262-
Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
263+
Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
263264
Dst, InFlag);
264265
InFlag = Chain.getValue(1);
265-
Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
266+
Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RSI : X86::ESI,
266267
Src, InFlag);
267268
InFlag = Chain.getValue(1);
268269

llvm/test/CodeGen/X86/pr38865.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -show-mc-encoding < %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-unknown-linux-gnux32"
6+
7+
%struct.a = type { [65 x i32] }
8+
9+
@c = global %struct.a zeroinitializer, align 4
10+
11+
define void @e() nounwind {
12+
; CHECK-LABEL: e:
13+
; CHECK: # %bb.0: # %entry
14+
; CHECK-NEXT: pushq %rbx # encoding: [0x53]
15+
; CHECK-NEXT: subl $528, %esp # encoding: [0x81,0xec,0x10,0x02,0x00,0x00]
16+
; CHECK-NEXT: # imm = 0x210
17+
; CHECK-NEXT: leal {{[0-9]+}}(%rsp), %ebx # encoding: [0x8d,0x9c,0x24,0x08,0x01,0x00,0x00]
18+
; CHECK-NEXT: movl $c, %esi # encoding: [0xbe,A,A,A,A]
19+
; CHECK-NEXT: # fixup A - offset: 1, value: c, kind: FK_Data_4
20+
; CHECK-NEXT: movl $260, %edx # encoding: [0xba,0x04,0x01,0x00,0x00]
21+
; CHECK-NEXT: # imm = 0x104
22+
; CHECK-NEXT: movl %ebx, %edi # encoding: [0x89,0xdf]
23+
; CHECK-NEXT: callq memcpy # encoding: [0xe8,A,A,A,A]
24+
; CHECK-NEXT: # fixup A - offset: 1, value: memcpy-4, kind: FK_PCRel_4
25+
; CHECK-NEXT: movl $32, %ecx # encoding: [0xb9,0x20,0x00,0x00,0x00]
26+
; CHECK-NEXT: movl %esp, %edi # encoding: [0x89,0xe7]
27+
; CHECK-NEXT: movl %ebx, %esi # encoding: [0x89,0xde]
28+
; CHECK-NEXT: rep;movsq (%esi), %es:(%edi) # encoding: [0xf3,0x67,0x48,0xa5]
29+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x67,0x8b,0x84,0x24,0x08,0x02,0x00,0x00]
30+
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # encoding: [0x67,0x89,0x84,0x24,0x00,0x01,0x00,0x00]
31+
; CHECK-NEXT: callq d # encoding: [0xe8,A,A,A,A]
32+
; CHECK-NEXT: # fixup A - offset: 1, value: d-4, kind: FK_PCRel_4
33+
; CHECK-NEXT: addl $528, %esp # encoding: [0x81,0xc4,0x10,0x02,0x00,0x00]
34+
; CHECK-NEXT: # imm = 0x210
35+
; CHECK-NEXT: popq %rbx # encoding: [0x5b]
36+
; CHECK-NEXT: retq # encoding: [0xc3]
37+
entry:
38+
%byval-temp = alloca %struct.a, align 8
39+
%0 = bitcast %struct.a* %byval-temp to i8*
40+
call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 8 %0, i8* align 4 bitcast (%struct.a* @c to i8*), i32 260, i1 false)
41+
call void @d(%struct.a* byval nonnull align 8 %byval-temp)
42+
ret void
43+
}
44+
45+
declare void @d(%struct.a* byval align 8) local_unnamed_addr #1
46+
47+
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1)

0 commit comments

Comments
 (0)