Skip to content

[WebAssembly] load_zero to initialise build_vector #100610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2024

Conversation

sparker-arm
Copy link
Contributor

Instead of splatting a single lane, to initialise a build_vector, lower to scalar_to_vector which can be selected to load_zero.

Also add the patterns for f32x4 and f64x2.

@llvmbot
Copy link
Member

llvmbot commented Jul 25, 2024

@llvm/pr-subscribers-backend-webassembly

Author: Sam Parker (sparker-arm)

Changes

Instead of splatting a single lane, to initialise a build_vector, lower to scalar_to_vector which can be selected to load_zero.

Also add the patterns for f32x4 and f64x2.


Full diff: https://github.com/llvm/llvm-project/pull/100610.diff

4 Files Affected:

  • (modified) llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h (+2-2)
  • (modified) llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (+9-2)
  • (modified) llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td (+7-7)
  • (modified) llvm/test/CodeGen/WebAssembly/simd-build-vector.ll (+73)
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 7f1a5f616ed48..cdc9d9e2e3ec0 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -238,7 +238,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(MEMORY_ATOMIC_NOTIFY)
   WASM_LOAD_STORE(MEMORY_ATOMIC_WAIT32)
   WASM_LOAD_STORE(LOAD32_SPLAT)
-  WASM_LOAD_STORE(LOAD_ZERO_I32x4)
+  WASM_LOAD_STORE(LOAD_ZERO_32)
   WASM_LOAD_STORE(LOAD_LANE_I32x4)
   WASM_LOAD_STORE(STORE_LANE_I32x4)
   return 2;
@@ -263,7 +263,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(LOAD_EXTEND_U_I32x4)
   WASM_LOAD_STORE(LOAD_EXTEND_S_I64x2)
   WASM_LOAD_STORE(LOAD_EXTEND_U_I64x2)
-  WASM_LOAD_STORE(LOAD_ZERO_I64x2)
+  WASM_LOAD_STORE(LOAD_ZERO_64)
   WASM_LOAD_STORE(LOAD_LANE_I64x2)
   WASM_LOAD_STORE(STORE_LANE_I64x2)
   return 3;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index f77076d7244ca..960ef90148095 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2275,8 +2275,15 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
       return IsConstant(Lane);
     };
   } else {
-    // Use a splat (which might be selected as a load splat)
-    Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
+    size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
+    if (NumSplatLanes == 1 && (DestLaneSize == 32 || DestLaneSize == 64)) {
+      // Could be selected to load_zero.
+      assert(SplatValue == Op->getOperand(0));
+      Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
+    } else {
+      // Use a splat (which might be selected as a load splat)
+      Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
+    }
     IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
       return Lane == SplatValue;
     };
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 26fe61b1d6051..76fde44a3f63c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -273,13 +273,13 @@ defm : LoadPat<vec.vt, loadpat, inst>;
 multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
   defvar name = "v128.load"#vec.lane_bits#"_zero";
   let mayLoad = 1, UseNamedOperandTable = 1 in {
-  defm LOAD_ZERO_#vec#_A32 :
+  defm LOAD_ZERO_#vec.lane_bits#_A32 :
     SIMD_I<(outs V128:$dst),
            (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
            (outs), (ins P2Align:$p2align, offset32_op:$off), [],
            name#"\t$dst, ${off}(${addr})$p2align",
            name#"\t$off$p2align", simdop>;
-  defm LOAD_ZERO_#vec#_A64 :
+  defm LOAD_ZERO_#vec.lane_bits#_A64 :
     SIMD_I<(outs V128:$dst),
            (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
            (outs), (ins P2Align:$p2align, offset64_op:$off), [],
@@ -293,15 +293,15 @@ defm "" : SIMDLoadZero<I64x2, 0x5d>;
 
 // Use load_zero to load scalars into vectors as well where possible.
 // TODO: i16, and i8 scalars
-foreach vec = [I32x4, I64x2] in {
-  defvar inst = "LOAD_ZERO_"#vec;
+foreach vec = [I32x4, I64x2, F32x4, F64x2] in {
+  defvar inst = "LOAD_ZERO_"#vec.lane_bits;
   defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>;
   defm : LoadPat<vec.vt, pat, inst>;
 }
 
 // TODO: f32x4 and f64x2 as well
 foreach vec = [I32x4, I64x2] in {
-  defvar inst = "LOAD_ZERO_"#vec;
+  defvar inst = "LOAD_ZERO_"#vec.lane_bits;
   defvar pat = PatFrag<(ops node:$ptr),
     (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>;
   defm : LoadPat<vec.vt, pat, inst>;
@@ -1463,10 +1463,10 @@ def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
 // Adapted from the body of LoadPatNoOffset
 // TODO: other addressing patterns
 def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))),
-          (promote_low_F64x2 (LOAD_ZERO_I64x2_A32 0, 0, I32:$addr))>,
+          (promote_low_F64x2 (LOAD_ZERO_64_A32 0, 0, I32:$addr))>,
       Requires<[HasAddr32]>;
 def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))),
-          (promote_low_F64x2 (LOAD_ZERO_I64x2_A64 0, 0, I64:$addr))>,
+          (promote_low_F64x2 (LOAD_ZERO_64_A64 0, 0, I64:$addr))>,
       Requires<[HasAddr64]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
index a51b358de2e89..7075e21ccf0c8 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
@@ -440,3 +440,76 @@ define <2 x double> @all_undef_f64x2() {
 ; CHECK-NEXT:    return $0
   ret <2 x double> undef
 }
+
+define <4 x i32> @load_zero_lane_i32x4(ptr %addr.a, ptr %addr.b, ptr %addr.c, ptr %addr.d) {
+; CHECK-LABEL: load_zero_lane_i32x4:
+; CHECK:         .functype load_zero_lane_i32x4 (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.load32_zero $push0=, 0($0)
+; CHECK-NEXT:    v128.load32_lane $push1=, 0($1), $pop0, 1
+; CHECK-NEXT:    v128.load32_lane $push2=, 0($2), $pop1, 2
+; CHECK-NEXT:    v128.load32_lane $push3=, 0($3), $pop2, 3
+; CHECK-NEXT:    return $pop3
+  %a = load i32, ptr %addr.a
+  %b = load i32, ptr %addr.b
+  %c = load i32, ptr %addr.c
+  %d = load i32, ptr %addr.d
+  %v = insertelement <4 x i32> undef, i32 %a, i32 0
+  %v.1 = insertelement <4 x i32> %v, i32 %b, i32 1
+  %v.2 = insertelement <4 x i32> %v.1, i32 %c, i32 2
+  %v.3 = insertelement <4 x i32> %v.2, i32 %d, i32 3
+  ret <4 x i32> %v.3
+}
+
+define <2 x i64> @load_zero_lane_i64x2(ptr %addr.a, ptr %addr.b) {
+; CHECK-LABEL: load_zero_lane_i64x2:
+; CHECK:         .functype load_zero_lane_i64x2 (i32, i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.load64_zero $push0=, 0($0)
+; CHECK-NEXT:    v128.load64_lane $push1=, 0($1), $pop0, 1
+; CHECK-NEXT:    return $pop1
+  %a = load i64, ptr %addr.a
+  %b = load i64, ptr %addr.b
+  %v = insertelement <2 x i64> undef, i64 %a, i32 0
+  %v.1 = insertelement <2 x i64> %v, i64 %b, i32 1
+  ret <2 x i64> %v.1
+}
+
+define <4 x float> @load_zero_lane_f32x4(ptr %addr.a, ptr %addr.b, ptr %addr.c, ptr %addr.d) {
+; CHECK-LABEL: load_zero_lane_f32x4:
+; CHECK:         .functype load_zero_lane_f32x4 (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.load32_zero $push1=, 0($0)
+; CHECK-NEXT:    f32.load $push0=, 0($1)
+; CHECK-NEXT:    f32x4.replace_lane $push2=, $pop1, 1, $pop0
+; CHECK-NEXT:    f32.load $push3=, 0($2)
+; CHECK-NEXT:    f32x4.replace_lane $push4=, $pop2, 2, $pop3
+; CHECK-NEXT:    f32.load $push5=, 0($3)
+; CHECK-NEXT:    f32x4.replace_lane $push6=, $pop4, 3, $pop5
+; CHECK-NEXT:    return $pop6
+  %a = load float, ptr %addr.a
+  %b = load float, ptr %addr.b
+  %c = load float, ptr %addr.c
+  %d = load float, ptr %addr.d
+  %v = insertelement <4 x float> undef, float %a, i32 0
+  %v.1 = insertelement <4 x float> %v, float %b, i32 1
+  %v.2 = insertelement <4 x float> %v.1, float %c, i32 2
+  %v.3 = insertelement <4 x float> %v.2, float %d, i32 3
+  ret <4 x float> %v.3
+}
+
+define <2 x double> @load_zero_lane_f64x2(ptr %addr.a, ptr %addr.b) {
+; CHECK-LABEL: load_zero_lane_f64x2:
+; CHECK:         .functype load_zero_lane_f64x2 (i32, i32) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.load64_zero $push1=, 0($0)
+; CHECK-NEXT:    f64.load $push0=, 0($1)
+; CHECK-NEXT:    f64x2.replace_lane $push2=, $pop1, 1, $pop0
+; CHECK-NEXT:    return $pop2
+  %a = load double, ptr %addr.a
+  %b = load double, ptr %addr.b
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %v.1 = insertelement <2 x double> %v, double %b, i32 1
+  ret <2 x double> %v.1
+}
+

Instead of splatting a single lane, to initialise a build_vector,
lower to scalar_to_vector which can be selected to load_zero.

Also add the patterns for load_zero and load_lane for f32x4 and
f64x2 as well.
@sparker-arm sparker-arm force-pushed the load-zero-build-vector branch from 3e9b034 to 21d900d Compare August 1, 2024 14:53
Copy link
Collaborator

@tlively tlively left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good! I guess the load_zero is generally more efficient than the load_splat?

@sparker-arm
Copy link
Contributor Author

I guess the load_zero is generally more efficient than the load_splat?

Yes, it should be more easily codegen'd to a scalar load, of some sort. I noticed on my Arm box that these were quite costly in numerical loops. These are some of the uplifts I saw on V8:

polybench-2mm                    8.325%
polybench-atax                   3.03%
polybench-correlation            2.88%
polybench-covariance             1.773%
polybench-doitgen                0.833%
polybench-gemver                 1.792%
polybench-ludcmp                 5.075%
polybench-mvt                    2.703%

@sparker-arm sparker-arm merged commit 08decd2 into llvm:main Aug 2, 2024
7 checks passed
@sbc100
Copy link
Collaborator

sbc100 commented Aug 4, 2024

This is causing the llvm roller in emscripten to fail due to failing tests test_avx, test_sse4_1, test_sse2:

See https://ci.chromium.org/ui/p/emscripten-releases/builders/try/linux/b8740537667603659377/overview and
https://logs.chromium.org/logs/emscripten-releases/buildbucket/cr-buildbucket/8740537667603659377/+/u/Emscripten_testsuite__cores_/stdout

test_avx fails like this:

$ ./test/runner cores.test_avx
ninja: Entering directory `/usr/local/google/home/sbc/dev/wasm/llvm-build'
[414/414] Creating library symlink lib/libclang-cpp.so
Test suites:
['test_core']
Running test_core: (1 tests)
(checking sanity from test runner)
shared:INFO: (Emscripten: Running sanity checks)
test_avx (test_core.cores.test_avx) ... clang++: /usr/local/google/home/sbc/dev/wasm/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp:2281: SDValue llvm::WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue, SelectionDAG &) const: Assertion `SplatValue == Op->getOperand(0)' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /usr/local/google/home/sbc/dev/wasm/llvm-build/bin/clang++ -target wasm32-unknown-emscripten -fignore-exceptions -mllvm -combiner-global-alias-analysis=false -mllvm -enable-emscripten-sjlj -mllvm -disable-lsr --sysroot=/usr/local/google/home/sbc/dev/wasm/emscripten/cache/sysroot -DEMSCRIPTEN -D__SSE__=1 -D__SSE2__=1 -D__SSE3__=1 -D__SSSE3__=1 -D__SSE4_1__=1 -D__SSE4_2__=1 -D__AVX__=1 -Xclang -iwithsysroot/include/fakesdl -Xclang -iwithsysroot/include/compat -Werror -Os -msimd128 -fno-lax-vector-conversions -I/usr/local/google/home/sbc/dev/wasm/emscripten/test/sse -Wno-argument-outside-range /usr/local/google/home/sbc/dev/wasm/emscripten/test/sse/test_avx.cpp -c -o /tmp/emscripten_temp_hdrppc_7/test_avx_0.o
1.	<eof> parser at end of file
2.	Code generation
3.	Running pass 'Function Pass Manager' on module '/usr/local/google/home/sbc/dev/wasm/emscripten/test/sse/test_avx.cpp'.
4.	Running pass 'WebAssembly Instruction Selection' on function '@__original_main'
 #0 0x00007f494dd87fa8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMSupport.so.20.0git+0x187fa8)
 #1 0x00007f494dd85ade llvm::sys::RunSignalHandlers() (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMSupport.so.20.0git+0x185ade)
 #2 0x00007f494dd87371 llvm::sys::CleanupOnSignal(unsigned long) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMSupport.so.20.0git+0x187371)
 #3 0x00007f494dcc4636 CrashRecoverySignalHandler(int) CrashRecoveryContext.cpp:0:0
 #4 0x00007f494ec591a0 (/lib/x86_64-linux-gnu/libc.so.6+0x3d1a0)
 #5 0x00007f494eca70ec __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #6 0x00007f494ec59102 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #7 0x00007f494ec424f2 abort ./stdlib/abort.c:81:7
 #8 0x00007f494ec42415 _nl_load_domain ./intl/loadmsgcat.c:1177:9
 #9 0x00007f494ec51d32 (/lib/x86_64-linux-gnu/libc.so.6+0x35d32)
#10 0x00007f4952f4ae9d (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMWebAssemblyCodeGen.so.20.0git+0x82e9d)
#11 0x00007f4952f45ec6 llvm::WebAssemblyTargetLowering::LowerOperation(llvm::SDValue, llvm::SelectionDAG&) const (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMWebAssemblyCodeGen.so.20.0git+0x7dec6)
#12 0x00007f494d72791c (anonymous namespace)::SelectionDAGLegalize::LegalizeOp(llvm::SDNode*) LegalizeDAG.cpp:0:0
#13 0x00007f494d726a66 llvm::SelectionDAG::Legalize() (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/../lib/libLLVMSelectionDAG.so.20.0git+0x126a66)
#14 0x00007f494d8e7b8b llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/../lib/libLLVMSelectionDAG.so.20.0git+0x2e7b8b)
#15 0x00007f494d8e5d03 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/../lib/libLLVMSelectionDAG.so.20.0git+0x2e5d03)
#16 0x00007f494d8e30d1 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/../lib/libLLVMSelectionDAG.so.20.0git+0x2e30d1)
#17 0x00007f494d8e08f1 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/../lib/libLLVMSelectionDAG.so.20.0git+0x2e08f1)
#18 0x00007f4951b5fea7 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMCodeGen.so.20.0git+0x35fea7)
#19 0x00007f494e29480a llvm::FPPassManager::runOnFunction(llvm::Function&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMCore.so.20.0git+0x29480a)
#20 0x00007f494e29cec2 llvm::FPPassManager::runOnModule(llvm::Module&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMCore.so.20.0git+0x29cec2)
#21 0x00007f494e29530c llvm::legacy::PassManagerImpl::run(llvm::Module&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMCore.so.20.0git+0x29530c)
#22 0x00007f49522ee0a6 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::__2::unique_ptr<llvm::raw_pwrite_stream, std::__2::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangCodeGen.so.20.0git+0xee0a6)
#23 0x00007f49527374ef clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangCodeGen.so.20.0git+0x5374ef)
#24 0x00007f494b9b9389 clang::ParseAST(clang::Sema&, bool, bool) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/../lib/libclangParse.so.20.0git+0x39389)
#25 0x00007f495093ee5f clang::FrontendAction::Execute() (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangFrontend.so.20.0git+0x13ee5f)
#26 0x00007f49508a53dd clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangFrontend.so.20.0git+0xa53dd)
#27 0x00007f49534ee648 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangFrontendTool.so.20.0git+0x4648)
#28 0x0000562e2e31c8f3 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/clang+++0x108f3)
#29 0x0000562e2e3190b8 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&) driver.cpp:0:0
#30 0x00007f49504d03f9 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::__2::optional<llvm::StringRef>>, std::__2::basic_string<char, std::__2::char_traits<char>, std::__2::allocator<char>>*, bool*) const::$_0>(long) Job.cpp:0:0
#31 0x00007f494dcc437e llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libLLVMSupport.so.20.0git+0xc437e)
#32 0x00007f49504cfbb3 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::__2::optional<llvm::StringRef>>, std::__2::basic_string<char, std::__2::char_traits<char>, std::__2::allocator<char>>*, bool*) const (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangDriver.so.20.0git+0xcfbb3)
#33 0x00007f495048cc1e clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangDriver.so.20.0git+0x8cc1e)
#34 0x00007f495048cf3e clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::__2::pair<int, clang::driver::Command const*>>&, bool) const (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangDriver.so.20.0git+0x8cf3e)
#35 0x00007f49504adc5c clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::__2::pair<int, clang::driver::Command const*>>&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/../lib/libclangDriver.so.20.0git+0xadc5c)
#36 0x0000562e2e3185ee clang_main(int, char**, llvm::ToolContext const&) (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/clang+++0xc5ee)
#37 0x0000562e2e326fd7 main (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/clang+++0x1afd7)
#38 0x00007f494ec43b8a __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#39 0x00007f494ec43c45 call_init ./csu/../csu/libc-start.c:128:20
#40 0x00007f494ec43c45 __libc_start_main ./csu/../csu/libc-start.c:347:5
#41 0x0000562e2e316b8a _start (/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/clang+++0xab8a)
clang++: error: clang frontend command failed with exit code 134 (use -v to see invocation)
clang version 20.0.0git (https://github.com/sbc100/llvm-project 08decd20a968ddec459376ef64a4987ef74e15dc)
Target: wasm32-unknown-emscripten
Thread model: posix
InstalledDir: /usr/local/google/home/sbc/dev/wasm/llvm-build/bin
Build config: +assertions
clang++: note: diagnostic msg: 
********************

PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:
Preprocessed source(s) and associated run script(s) are located at:
clang++: note: diagnostic msg: /tmp/test_avx-385bbb.cpp
clang++: note: diagnostic msg: /tmp/test_avx-385bbb.sh
clang++: note: diagnostic msg: 

********************
em++: error: '/usr/local/google/home/sbc/dev/wasm/llvm-build/bin/clang++ -target wasm32-unknown-emscripten -fignore-exceptions -mllvm -combiner-global-alias-analysis=false -mllvm -enable-emscripten-sjlj -mllvm -disable-lsr --sysroot=/usr/local/google/home/sbc/dev/wasm/emscripten/cache/sysroot -DEMSCRIPTEN -D__SSE__=1 -D__SSE2__=1 -D__SSE3__=1 -D__SSSE3__=1 -D__SSE4_1__=1 -D__SSE4_2__=1 -D__AVX__=1 -Xclang -iwithsysroot/include/fakesdl -Xclang -iwithsysroot/include/compat -Werror -Os -msimd128 -fno-lax-vector-conversions -I/usr/local/google/home/sbc/dev/wasm/emscripten/test/sse -Wno-argument-outside-range /usr/local/google/home/sbc/dev/wasm/emscripten/test/sse/test_avx.cpp -c -o /tmp/emscripten_temp_hdrppc_7/test_avx_0.o' failed (returned 1)

test.zip

@sparker-arm
Copy link
Contributor Author

Okay, it's hitting the assertion I added. I'll take a look today.

@sparker-arm
Copy link
Contributor Author

Proposed fix: #101961

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants