Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 70ad23e

Browse files
committed
[X86][AVX512] Change VCVTSS2SD and VCVTSD2SS node types to keep consistency between VEX/EVEX versions.
AVX versions of the converts work on f32/f64 types, while AVX512 version work on vectors. Differential Revision: https://reviews.llvm.org/D29988 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295940 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 210095c commit 70ad23e

File tree

4 files changed

+102
-54
lines changed

4 files changed

+102
-54
lines changed

lib/Target/X86/X86InstrAVX512.td

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,23 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
259259
MaskingConstraint, NoItinerary, IsCommutable,
260260
IsKCommutable>;
261261

262+
// Similar to AVX512_maskable_common, but with scalar types.
263+
multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
264+
dag Outs,
265+
dag Ins, dag MaskingIns, dag ZeroMaskingIns,
266+
string OpcodeStr,
267+
string AttSrcAsm, string IntelSrcAsm,
268+
SDNode Select = vselect,
269+
string MaskingConstraint = "",
270+
InstrItinClass itin = NoItinerary,
271+
bit IsCommutable = 0,
272+
bit IsKCommutable = 0> :
273+
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
274+
AttSrcAsm, IntelSrcAsm,
275+
[], [], [],
276+
MaskingConstraint, NoItinerary, IsCommutable,
277+
IsKCommutable>;
278+
262279
// This multiclass generates the unconditional/non-masking, the masking and
263280
// the zero-masking variant of the vector instruction. In the masking case, the
264281
// perserved vector elements come from a new dummy input operand tied to $dst.
@@ -291,6 +308,18 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
291308
(X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
292309
X86selects, "$src0 = $dst", itin, IsCommutable>;
293310

311+
// Similar to AVX512_maskable_scalar, but with scalar types.
312+
multiclass AVX512_maskable_fp_scalar<bits<8> O, Format F, X86VectorVTInfo _,
313+
dag Outs, dag Ins, string OpcodeStr,
314+
string AttSrcAsm, string IntelSrcAsm,
315+
InstrItinClass itin = NoItinerary,
316+
bit IsCommutable = 0> :
317+
AVX512_maskable_fp_common<O, F, _, Outs, Ins,
318+
!con((ins _.FRC:$src0, _.KRCWM:$mask), Ins),
319+
!con((ins _.KRCWM:$mask), Ins),
320+
OpcodeStr, AttSrcAsm, IntelSrcAsm,
321+
X86selects, "$src0 = $dst", itin, IsCommutable>;
322+
294323
// Similar to AVX512_maskable but in this case one of the source operands
295324
// ($src1) is already tied to $dst so we just use that for the preserved
296325
// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
@@ -6030,27 +6059,40 @@ let Predicates = [HasAVX512] in {
60306059
//===----------------------------------------------------------------------===//
60316060
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
60326061
X86VectorVTInfo _Src, SDNode OpNode> {
6033-
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6062+
let isCodeGenOnly = 1 in {
6063+
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
60346064
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
60356065
"$src2, $src1", "$src1, $src2",
60366066
(_.VT (OpNode (_.VT _.RC:$src1),
60376067
(_Src.VT _Src.RC:$src2),
60386068
(i32 FROUND_CURRENT)))>,
60396069
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
6040-
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6041-
(ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
6070+
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6071+
(ins _.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
60426072
"$src2, $src1", "$src1, $src2",
60436073
(_.VT (OpNode (_.VT _.RC:$src1),
60446074
(_Src.VT (scalar_to_vector
60456075
(_Src.ScalarLdFrag addr:$src2))),
60466076
(i32 FROUND_CURRENT)))>,
60476077
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
6078+
}
6079+
6080+
defm rr : AVX512_maskable_fp_scalar<opc, MRMSrcReg, _, (outs _.FRC:$dst),
6081+
(ins _.FRC:$src1, _Src.FRC:$src2), OpcodeStr,
6082+
"$src2, $src1", "$src1, $src2">,
6083+
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
6084+
let mayLoad = 1 in
6085+
defm rm : AVX512_maskable_fp_scalar<opc, MRMSrcMem, _, (outs _.FRC:$dst),
6086+
(ins _.FRC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
6087+
"$src2, $src1", "$src1, $src2">,
6088+
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
6089+
60486090
}
60496091

60506092
// Scalar Coversion with SAE - suppress all exceptions
60516093
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
60526094
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6053-
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6095+
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
60546096
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
60556097
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
60566098
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6062,7 +6104,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
60626104
// Scalar Conversion with rounding control (RC)
60636105
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
60646106
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6065-
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6107+
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
60666108
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
60676109
"$rc, $src2, $src1", "$src1, $src2, $rc",
60686110
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6095,39 +6137,36 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
60956137
X86fpextRnd,f32x_info, f64x_info >;
60966138

60976139
def : Pat<(f64 (fpextend FR32X:$src)),
6098-
(COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
6099-
(COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
6140+
(VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
61006141
Requires<[HasAVX512]>;
61016142
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
6102-
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
6143+
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
61036144
Requires<[HasAVX512]>;
61046145

61056146
def : Pat<(f64 (extloadf32 addr:$src)),
6106-
(COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
6147+
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
61076148
Requires<[HasAVX512, OptForSize]>;
61086149

61096150
def : Pat<(f64 (extloadf32 addr:$src)),
6110-
(COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
6111-
(COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
6151+
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
61126152
Requires<[HasAVX512, OptForSpeed]>;
61136153

61146154
def : Pat<(f32 (fpround FR64X:$src)),
6115-
(COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
6116-
(COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
6155+
(VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
61176156
Requires<[HasAVX512]>;
61186157

61196158
def : Pat<(v4f32 (X86Movss
61206159
(v4f32 VR128X:$dst),
61216160
(v4f32 (scalar_to_vector
61226161
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6123-
(VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
6162+
(VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
61246163
Requires<[HasAVX512]>;
61256164

61266165
def : Pat<(v2f64 (X86Movsd
61276166
(v2f64 VR128X:$dst),
61286167
(v2f64 (scalar_to_vector
61296168
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6130-
(VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
6169+
(VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
61316170
Requires<[HasAVX512]>;
61326171

61336172
//===----------------------------------------------------------------------===//

lib/Target/X86/X86InstrInfo.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1851,6 +1851,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
18511851
{ X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
18521852
{ X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
18531853
{ X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
1854+
{ X86::VCVTSS2SDZrr, X86::VCVTSS2SDZrm, 0 },
1855+
{ X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE },
1856+
{ X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0 },
1857+
{ X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE },
18541858
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
18551859
{ X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
18561860
{ X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
@@ -8165,11 +8169,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
81658169
case X86::VCVTUSI642SDZrrb_Int:
81668170
case X86::VCVTUSI642SDZrm_Int:
81678171
case X86::VCVTSD2SSZrr:
8168-
case X86::VCVTSD2SSZrrb:
8172+
case X86::VCVTSD2SSZrr_Int:
8173+
case X86::VCVTSD2SSZrrb_Int:
81698174
case X86::VCVTSD2SSZrm:
8175+
case X86::VCVTSD2SSZrm_Int:
81708176
case X86::VCVTSS2SDZrr:
8171-
case X86::VCVTSS2SDZrrb:
8177+
case X86::VCVTSS2SDZrr_Int:
8178+
case X86::VCVTSS2SDZrrb_Int:
81728179
case X86::VCVTSS2SDZrm:
8180+
case X86::VCVTSS2SDZrm_Int:
81738181
case X86::VRNDSCALESDr:
81748182
case X86::VRNDSCALESDrb:
81758183
case X86::VRNDSCALESDm:

lib/Target/X86/X86InstrSSE.td

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,20 +1716,21 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
17161716
// Convert scalar double to scalar single
17171717
let hasSideEffects = 0, Predicates = [UseAVX] in {
17181718
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
1719-
(ins FR64:$src1, FR64:$src2),
1719+
(ins FR32:$src1, FR64:$src2),
17201720
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
17211721
IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
17221722
Sched<[WriteCvtF2F]>, VEX_WIG;
17231723
let mayLoad = 1 in
17241724
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
1725-
(ins FR64:$src1, f64mem:$src2),
1725+
(ins FR32:$src1, f64mem:$src2),
17261726
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
17271727
[], IIC_SSE_CVT_Scalar_RM>,
17281728
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
17291729
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
17301730
}
17311731

1732-
def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
1732+
def : Pat<(f32 (fpround FR64:$src)),
1733+
(VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
17331734
Requires<[UseAVX]>;
17341735

17351736
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
@@ -1781,30 +1782,30 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
17811782
// SSE2 instructions with XS prefix
17821783
let hasSideEffects = 0, Predicates = [UseAVX] in {
17831784
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
1784-
(ins FR32:$src1, FR32:$src2),
1785+
(ins FR64:$src1, FR32:$src2),
17851786
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
17861787
[], IIC_SSE_CVT_Scalar_RR>,
17871788
XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
17881789
Sched<[WriteCvtF2F]>, VEX_WIG;
17891790
let mayLoad = 1 in
17901791
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
1791-
(ins FR32:$src1, f32mem:$src2),
1792+
(ins FR64:$src1, f32mem:$src2),
17921793
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
17931794
[], IIC_SSE_CVT_Scalar_RM>,
17941795
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
17951796
Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
17961797
}
17971798

17981799
def : Pat<(f64 (fpextend FR32:$src)),
1799-
(VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
1800+
(VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>;
18001801
def : Pat<(fpextend (loadf32 addr:$src)),
1801-
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
1802+
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
18021803

18031804
def : Pat<(extloadf32 addr:$src),
1804-
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
1805+
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
18051806
Requires<[UseAVX, OptForSize]>;
18061807
def : Pat<(extloadf32 addr:$src),
1807-
(VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
1808+
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
18081809
Requires<[UseAVX, OptForSpeed]>;
18091810

18101811
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),

test/CodeGen/X86/vector-half-conversions.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
66

77
;
88
; Half to Float
@@ -1941,25 +1941,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
19411941
; AVX1-LABEL: cvt_8i16_to_8f64:
19421942
; AVX1: # BB#0:
19431943
; AVX1-NEXT: vmovq %xmm0, %rdx
1944-
; AVX1-NEXT: movq %rdx, %r9
1944+
; AVX1-NEXT: movq %rdx, %r8
19451945
; AVX1-NEXT: movl %edx, %r10d
1946-
; AVX1-NEXT: movswl %dx, %r8d
1946+
; AVX1-NEXT: movswl %dx, %r9d
19471947
; AVX1-NEXT: shrq $48, %rdx
1948-
; AVX1-NEXT: shrq $32, %r9
1948+
; AVX1-NEXT: shrq $32, %r8
19491949
; AVX1-NEXT: shrl $16, %r10d
19501950
; AVX1-NEXT: vpextrq $1, %xmm0, %rdi
1951-
; AVX1-NEXT: movq %rdi, %rsi
1952-
; AVX1-NEXT: movl %edi, %eax
1951+
; AVX1-NEXT: movq %rdi, %rax
1952+
; AVX1-NEXT: movl %edi, %esi
19531953
; AVX1-NEXT: movswl %di, %ecx
19541954
; AVX1-NEXT: shrq $48, %rdi
1955-
; AVX1-NEXT: shrq $32, %rsi
1956-
; AVX1-NEXT: shrl $16, %eax
1957-
; AVX1-NEXT: cwtl
1958-
; AVX1-NEXT: vmovd %eax, %xmm0
1955+
; AVX1-NEXT: shrq $32, %rax
1956+
; AVX1-NEXT: shrl $16, %esi
1957+
; AVX1-NEXT: movswl %si, %esi
1958+
; AVX1-NEXT: vmovd %esi, %xmm0
19591959
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1
19601960
; AVX1-NEXT: vmovd %ecx, %xmm0
19611961
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2
1962-
; AVX1-NEXT: movswl %si, %eax
1962+
; AVX1-NEXT: cwtl
19631963
; AVX1-NEXT: vmovd %eax, %xmm0
19641964
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3
19651965
; AVX1-NEXT: movswl %di, %eax
@@ -1968,9 +1968,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
19681968
; AVX1-NEXT: movswl %r10w, %eax
19691969
; AVX1-NEXT: vmovd %eax, %xmm0
19701970
; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0
1971-
; AVX1-NEXT: vmovd %r8d, %xmm5
1971+
; AVX1-NEXT: vmovd %r9d, %xmm5
19721972
; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5
1973-
; AVX1-NEXT: movswl %r9w, %eax
1973+
; AVX1-NEXT: movswl %r8w, %eax
19741974
; AVX1-NEXT: vmovd %eax, %xmm6
19751975
; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6
19761976
; AVX1-NEXT: movswl %dx, %eax
@@ -1995,25 +1995,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
19951995
; AVX2-LABEL: cvt_8i16_to_8f64:
19961996
; AVX2: # BB#0:
19971997
; AVX2-NEXT: vmovq %xmm0, %rdx
1998-
; AVX2-NEXT: movq %rdx, %r9
1998+
; AVX2-NEXT: movq %rdx, %r8
19991999
; AVX2-NEXT: movl %edx, %r10d
2000-
; AVX2-NEXT: movswl %dx, %r8d
2000+
; AVX2-NEXT: movswl %dx, %r9d
20012001
; AVX2-NEXT: shrq $48, %rdx
2002-
; AVX2-NEXT: shrq $32, %r9
2002+
; AVX2-NEXT: shrq $32, %r8
20032003
; AVX2-NEXT: shrl $16, %r10d
20042004
; AVX2-NEXT: vpextrq $1, %xmm0, %rdi
2005-
; AVX2-NEXT: movq %rdi, %rsi
2006-
; AVX2-NEXT: movl %edi, %eax
2005+
; AVX2-NEXT: movq %rdi, %rax
2006+
; AVX2-NEXT: movl %edi, %esi
20072007
; AVX2-NEXT: movswl %di, %ecx
20082008
; AVX2-NEXT: shrq $48, %rdi
2009-
; AVX2-NEXT: shrq $32, %rsi
2010-
; AVX2-NEXT: shrl $16, %eax
2011-
; AVX2-NEXT: cwtl
2012-
; AVX2-NEXT: vmovd %eax, %xmm0
2009+
; AVX2-NEXT: shrq $32, %rax
2010+
; AVX2-NEXT: shrl $16, %esi
2011+
; AVX2-NEXT: movswl %si, %esi
2012+
; AVX2-NEXT: vmovd %esi, %xmm0
20132013
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1
20142014
; AVX2-NEXT: vmovd %ecx, %xmm0
20152015
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2
2016-
; AVX2-NEXT: movswl %si, %eax
2016+
; AVX2-NEXT: cwtl
20172017
; AVX2-NEXT: vmovd %eax, %xmm0
20182018
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3
20192019
; AVX2-NEXT: movswl %di, %eax
@@ -2022,9 +2022,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind {
20222022
; AVX2-NEXT: movswl %r10w, %eax
20232023
; AVX2-NEXT: vmovd %eax, %xmm0
20242024
; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0
2025-
; AVX2-NEXT: vmovd %r8d, %xmm5
2025+
; AVX2-NEXT: vmovd %r9d, %xmm5
20262026
; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5
2027-
; AVX2-NEXT: movswl %r9w, %eax
2027+
; AVX2-NEXT: movswl %r8w, %eax
20282028
; AVX2-NEXT: vmovd %eax, %xmm6
20292029
; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6
20302030
; AVX2-NEXT: movswl %dx, %eax

0 commit comments

Comments
 (0)