Skip to content

[X86][GlobalISel] Enable G_BUILD_VECTOR and G_CONSTANT_POOL #92844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
unsigned Opc = I.getOpcode();

assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
"unexpected instruction");
"Only G_STORE and G_LOAD are expected for selection");

const Register DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
Expand Down Expand Up @@ -576,11 +576,32 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
if (NewOpc == Opc)
return false;

X86AddressMode AM;
X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM);

I.setDesc(TII.get(NewOpc));
MachineInstrBuilder MIB(MF, I);
const MachineInstr *Ptr = MRI.getVRegDef(I.getOperand(1).getReg());

if (Ptr->getOpcode() == TargetOpcode::G_CONSTANT_POOL) {
assert(Opc == TargetOpcode::G_LOAD &&
"Only G_LOAD from constant pool is expected");
// TODO: Need a separate move for Large model
if (TM.getCodeModel() == CodeModel::Large)
return false;

unsigned char OpFlag = STI.classifyLocalReference(nullptr);
unsigned PICBase = 0;
if (OpFlag == X86II::MO_GOTOFF)
PICBase = TII.getGlobalBaseReg(&MF);
else if (STI.is64Bit())
PICBase = X86::RIP;

I.removeOperand(1);
addConstantPoolReference(MIB, Ptr->getOperand(1).getIndex(), PICBase,
OpFlag);
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}

X86AddressMode AM;
X86SelectAddress(*Ptr, MRI, AM);
if (Opc == TargetOpcode::G_LOAD) {
I.removeOperand(1);
addFullAddress(MIB, AM);
Expand Down
104 changes: 98 additions & 6 deletions llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
#include "X86LegalizerInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -71,6 +74,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
const LLT v16s32 = LLT::fixed_vector(16, 32);
const LLT v8s64 = LLT::fixed_vector(8, 64);

const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8;
const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16;
const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32;
const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64;

// todo: AVX512 bool vector predicate types

// implicit/constants
Expand Down Expand Up @@ -338,6 +346,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,

getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, sMaxScalar}});

getActionDefinitionsBuilder(G_CONSTANT_POOL).legalFor({p0});

getActionDefinitionsBuilder(G_PTR_ADD)
.legalIf([=](const LegalityQuery &Query) -> bool {
return typePairInSet(0, 1, {{p0, s32}})(Query) ||
Expand Down Expand Up @@ -368,9 +378,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
{s64, p0, s64, 1},
{v2s32, p0, v2s32, 1}});
if (HasSSE1)
Action.legalForTypesWithMemDesc({{v4s32, p0, v4s32, 1}});
if (HasSSE2)
Action.legalForTypesWithMemDesc({{v16s8, p0, v16s8, 1},
{v8s16, p0, v8s16, 1},
{v4s32, p0, v4s32, 1},
{v2s64, p0, v2s64, 1},
{v2p0, p0, v2p0, 1}});
if (HasAVX)
Expand All @@ -384,7 +395,9 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
{v32s16, p0, v32s16, 1},
{v16s32, p0, v16s32, 1},
{v8s64, p0, v8s64, 1}});
Action.widenScalarToNextPow2(0, /*Min=*/8).clampScalar(0, s8, sMaxScalar);
Action.widenScalarToNextPow2(0, /*Min=*/8)
.clampScalar(0, s8, sMaxScalar)
.scalarize(0);
}

for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
Expand All @@ -406,10 +419,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
(Query.Opcode == G_ANYEXT && Query.Types[0] == s128) ||
(Is64Bit && Query.Types[0] == s64);
})
.widenScalarToNextPow2(0, /*Min=*/8)
.clampScalar(0, s8, sMaxScalar)
.widenScalarToNextPow2(1, /*Min=*/8)
.clampScalar(1, s8, sMaxScalar);
.widenScalarToNextPow2(0, /*Min=*/8)
.clampScalar(0, s8, sMaxScalar)
.widenScalarToNextPow2(1, /*Min=*/8)
.clampScalar(1, s8, sMaxScalar)
.scalarize(0);

getActionDefinitionsBuilder(G_SEXT_INREG).lower();

Expand Down Expand Up @@ -484,6 +498,19 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.widenScalarToNextPow2(1);

// vector ops
getActionDefinitionsBuilder(G_BUILD_VECTOR)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AArch64 legalizes G_BUILD_VECTOR and then tries all kinds of tricks in the selector:

getActionDefinitionsBuilder(G_BUILD_VECTOR)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. To be honest this is unclear. Lowering G_BUILD_VECTOR earlier should allow to combine loads from constant pool. That is too late for instruction selection. However there may be some scenarios when we want to keep G_BUILD_VECTOR to combine into something more optimal. I think we should address to this issue later, when we have more complete picture how we want to optimize various shuffles and insertions. Current approach allows to reuse G_LOAD C++ implementation for G_BUILD_VECTOR instead of writing its own.

.customIf([=](const LegalityQuery &Query) {
return (HasSSE1 && typeInSet(0, {v4s32})(Query)) ||
(HasSSE2 && typeInSet(0, {v2s64, v8s16, v16s8})(Query)) ||
(HasAVX && typeInSet(0, {v4s64, v8s32, v16s16, v32s8})(Query)) ||
(HasAVX512 && typeInSet(0, {v8s64, v16s32, v32s16, v64s8}));
})
.clampNumElements(0, v16s8, s8MaxVector)
.clampNumElements(0, v8s16, s16MaxVector)
.clampNumElements(0, v4s32, s32MaxVector)
.clampNumElements(0, v2s64, s64MaxVector)
.moreElementsToNextPow2(0);

getActionDefinitionsBuilder({G_EXTRACT, G_INSERT})
.legalIf([=](const LegalityQuery &Query) {
unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1;
Expand Down Expand Up @@ -552,6 +579,71 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
verify(*STI.getInstrInfo());
}

bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
switch (MI.getOpcode()) {
default:
// No idea what to do.
return false;
case TargetOpcode::G_BUILD_VECTOR:
return legalizeBuildVector(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}

bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI,
MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
const auto &BuildVector = cast<GBuildVector>(MI);
Register Dst = BuildVector.getReg(0);
LLT DstTy = MRI.getType(Dst);
MachineFunction &MF = MIRBuilder.getMF();
LLVMContext &Ctx = MF.getFunction().getContext();
uint64_t DstTySize = DstTy.getScalarSizeInBits();

SmallVector<Constant *, 4> CstIdxs;
for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) {
Register Source = BuildVector.getSourceReg(i);

auto ValueAndReg = getIConstantVRegValWithLookThrough(Source, MRI);
if (ValueAndReg) {
CstIdxs.emplace_back(ConstantInt::get(Ctx, ValueAndReg->Value));
continue;
}

auto FPValueAndReg = getFConstantVRegValWithLookThrough(Source, MRI);
if (FPValueAndReg) {
CstIdxs.emplace_back(ConstantFP::get(Ctx, FPValueAndReg->Value));
continue;
}

if (getOpcodeDef<GImplicitDef>(Source, MRI)) {
CstIdxs.emplace_back(UndefValue::get(Type::getIntNTy(Ctx, DstTySize)));
continue;
}
return false;
}

Constant *ConstVal = ConstantVector::get(CstIdxs);

const DataLayout &DL = MIRBuilder.getDataLayout();
unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
auto Addr = MIRBuilder.buildConstantPool(
LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)),
MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
MachineMemOperand::MOLoad, DstTy, Alignment);

MIRBuilder.buildLoad(Dst, Addr, *MMO);
MI.eraseFromParent();
return true;
}

bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
return true;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ class X86LegalizerInfo : public LegalizerInfo {
public:
X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM);

bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const override;

bool legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const override;

private:
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,9 @@ bool X86PassConfig::addRegBankSelect() {

bool X86PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect(getOptLevel()));
// Add GlobalBaseReg in case there is no SelectionDAG passes afterwards
if (isGlobalISelAbortEnabled())
Comment on lines +508 to +509
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't need to consider which selector is enabled, the passes have to check if the selection failed or not

Copy link
Contributor Author

@e-kud e-kud May 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem here is that there is a mode when only GlobalISel works without SelectionDAG. And this pass is added by SelectionDAG. So when we don't have SelectionDAG, we need to add this pass for GlobalISel.

bool X86PassConfig::addInstSelector() {
  // Install an instruction selector.
  addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
  // For ELF, cleanup any local-dynamic TLS accesses.
  if (TM->getTargetTriple().isOSBinFormatELF() &&
      getOptLevel() != CodeGenOptLevel::None)
    addPass(createCleanupLocalDynamicTLSPass());
  addPass(createX86GlobalBaseRegPass());
  addPass(createX86ArgumentStackSlotPass());
  return false;
}

I don't know what is better. I see three options:

  1. Add the same pass twice, however one is enough
  2. Add only once depending on which selector is the last (this was implemented)
  3. Extract this pass from selectors into general pipeline (I haven't tried), but it looks like this pass fixes instruction selection and should go close to it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I read this backwards, but I guess this is just mirroring how the fallback pipeline is added. It might make sense to have a separate post-isel insert point

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, indeed, it mirrors the check from here

if (!isGlobalISelAbortEnabled())
if (auto Err = derived().addInstSelector(addPass))
return std::move(Err);

I think it would be better to create a new insert point in a separate PR?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would have to think about if we should have the hook. Also, that's the new PM code which I'm not sure even works yet. Should be looking at TargetPassConfig

addPass(createX86GlobalBaseRegPass());
return false;
}

Expand Down
112 changes: 112 additions & 0 deletions llvm/test/CodeGen/X86/isel-buildvector-avx.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx %s -o - | FileCheck %s --check-prefixes AVX-ALL,AVX
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -fast-isel %s -o - | FileCheck %s --check-prefixes AVX-ALL,AVX
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -global-isel-abort=1 %s -o - | FileCheck %s --check-prefixes AVX-ALL,AVX
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f %s -o - | FileCheck %s --check-prefixes AVX-ALL,AVX512
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -fast-isel %s -o - | FileCheck %s --check-prefixes AVX-ALL,AVX512
; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -global-isel-abort=1 %s -o - | FileCheck %s --check-prefixes AVX-ALL,AVX512

;
; 256 bit vectors
;

define <32 x i8> @test_vector_v32i8() {
; AVX-ALL-LABEL: test_vector_v32i8:
; AVX-ALL: # %bb.0:
; AVX-ALL-NEXT: vmovaps {{.*#+}} ymm0 = [99,2,7,77,56,5,48,73,36,63,68,13,59,34,36,117,43,11,61,97,104,113,46,89,42,12,97,41,73,7,55,73]
; AVX-ALL-NEXT: retq
ret <32 x i8> <i8 99, i8 2, i8 7, i8 77, i8 56, i8 5, i8 48, i8 73, i8 36, i8 63, i8 68, i8 13, i8 59, i8 34, i8 36, i8 117, i8 43, i8 11, i8 61, i8 97, i8 104, i8 113, i8 46, i8 89, i8 42, i8 12, i8 97, i8 41, i8 73, i8 7, i8 55, i8 73>
}

define <16 x i16> @test_vector_v16i16() {
; AVX-ALL-LABEL: test_vector_v16i16:
; AVX-ALL: # %bb.0:
; AVX-ALL-NEXT: vmovaps {{.*#+}} ymm0 = [2415,4748,23790,5373,22059,21582,12346,30507,9170,21469,12631,24765,31001,26396,24951,27843]
; AVX-ALL-NEXT: retq
ret <16 x i16> <i16 2415, i16 4748, i16 23790, i16 5373, i16 22059, i16 21582, i16 12346, i16 30507, i16 9170, i16 21469, i16 12631, i16 24765, i16 31001, i16 26396, i16 24951, i16 27843>
}

define <5 x float> @test_vector_v5f32() {
; AVX-ALL-LABEL: test_vector_v5f32:
; AVX-ALL: # %bb.0:
; AVX-ALL-NEXT: vmovaps {{.*#+}} ymm0 = [6.135E+3,2.179E+4,2.8365E+4,6.641E+3,2.6535E+4,u,u,u]
; AVX-ALL-NEXT: retq
ret <5 x float> <float 6135., float 21790., float 28365., float 6641., float 26535.>
}

define <8 x float> @test_vector_v8f32() {
; AVX-ALL-LABEL: test_vector_v8f32:
; AVX-ALL: # %bb.0:
; AVX-ALL-NEXT: vmovaps {{.*#+}} ymm0 = [6.135E+3,2.179E+4,2.8365E+4,6.641E+3,2.6535E+4,2.1447E+4,1.9619E+4,1.1916E+4]
; AVX-ALL-NEXT: retq
ret <8 x float> <float 6135., float 21790., float 28365., float 6641., float 26535., float 21447., float 19619., float 11916.>
}

define <4 x i64> @test_vector_v4i64() {
; AVX-ALL-LABEL: test_vector_v4i64:
; AVX-ALL: # %bb.0:
; AVX-ALL-NEXT: vmovaps {{.*#+}} ymm0 = [23430,24650,1,12]
; AVX-ALL-NEXT: retq
ret <4 x i64> <i64 23430, i64 24650, i64 1, i64 12>
}

;
; 512 bit vectors
;

define <64 x i8> @test_vector_v64i8() {
; AVX-X64-LABEL: test_vector_v64i8:
; AVX-LABEL: test_vector_v64i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,84,15,13,66,11,70,102,12,82,111,109,61,15,70,8,110,17,35,102,57,111,119,61,112,47,3,34,65,126,55,37]
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [9,100,124,46,65,75,68,70,120,109,125,21,98,121,127,13,119,64,2,0,9,79,10,78,53,81,37,95,99,79,114,3]
; AVX-NEXT: retq
;
; AVX512-LABEL: test_vector_v64i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps {{.*#+}} zmm0 = [0,84,15,13,66,11,70,102,12,82,111,109,61,15,70,8,110,17,35,102,57,111,119,61,112,47,3,34,65,126,55,37,9,100,124,46,65,75,68,70,120,109,125,21,98,121,127,13,119,64,2,0,9,79,10,78,53,81,37,95,99,79,114,3]
; AVX512-NEXT: retq
ret <64 x i8> <i8 0, i8 84, i8 15, i8 13, i8 66, i8 11, i8 70, i8 102, i8 12, i8 82, i8 111, i8 109, i8 61, i8 15, i8 70, i8 8, i8 110, i8 17, i8 35, i8 102, i8 57, i8 111, i8 119, i8 61, i8 112, i8 47, i8 3, i8 34, i8 65, i8 126, i8 55, i8 37, i8 9, i8 100, i8 124, i8 46, i8 65, i8 75, i8 68, i8 70, i8 120, i8 109, i8 125, i8 21, i8 98, i8 121, i8 127, i8 13, i8 119, i8 64, i8 2, i8 0, i8 9, i8 79, i8 10, i8 78, i8 53, i8 81, i8 37, i8 95, i8 99, i8 79, i8 114, i8 3>
}

define <32 x i16> @test_vector_v32i16() {
; AVX-LABEL: test_vector_v32i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [30901,2280,10793,13893,17914,6183,27317,29748,27420,12395,13504,18229,14700,11550,24714,26203]
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [23668,3198,27016,12020,31057,19311,16505,24461,28451,19446,23816,10995,17209,5831,27666,21680]
; AVX-NEXT: retq
;
; AVX512-LABEL: test_vector_v32i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps {{.*#+}} zmm0 = [30901,2280,10793,13893,17914,6183,27317,29748,27420,12395,13504,18229,14700,11550,24714,26203,23668,3198,27016,12020,31057,19311,16505,24461,28451,19446,23816,10995,17209,5831,27666,21680]
; AVX512-NEXT: retq
ret <32 x i16> <i16 30901, i16 2280, i16 10793, i16 13893, i16 17914, i16 6183, i16 27317, i16 29748, i16 27420, i16 12395, i16 13504, i16 18229, i16 14700, i16 11550, i16 24714, i16 26203, i16 23668, i16 3198, i16 27016, i16 12020, i16 31057, i16 19311, i16 16505, i16 24461, i16 28451, i16 19446, i16 23816, i16 10995, i16 17209, i16 5831, i16 27666, i16 21680>
}

define <16 x i32> @test_vector_v16i32() {
; AVX-LABEL: test_vector_v16i32:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [867316,75798,646113,495494,920699,901516,613751,811205]
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [778508,933022,441446,241046,364018,527717,71828,337100]
; AVX-NEXT: retq
;
; AVX512-LABEL: test_vector_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps {{.*#+}} zmm0 = [867316,75798,646113,495494,920699,901516,613751,811205,778508,933022,441446,241046,364018,527717,71828,337100]
; AVX512-NEXT: retq
ret <16 x i32> <i32 867316, i32 75798, i32 646113, i32 495494, i32 920699, i32 901516, i32 613751, i32 811205, i32 778508, i32 933022, i32 441446, i32 241046, i32 364018, i32 527717, i32 71828, i32 337100>
}

define <8 x double> @test_vector_v8f64() {
; AVX-LABEL: test_vector_v8f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [6.1349999999999998E+0,2.1789999999999998E+0,2.8365E+0,6.641E+0]
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [2.6535000000000002E+0,2.1446999999999998E+0,1.9619E+0,1.1916E+0]
; AVX-NEXT: retq
;
; AVX512-LABEL: test_vector_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps {{.*#+}} zmm0 = [6.1349999999999998E+0,2.1789999999999998E+0,2.8365E+0,6.641E+0,2.6535000000000002E+0,2.1446999999999998E+0,1.9619E+0,1.1916E+0]
; AVX512-NEXT: retq
ret <8 x double> <double 6.135, double 2.1790, double 2.8365, double 6.641, double 2.6535, double 2.1447, double 1.9619, double 1.1916>
}
Loading
Loading