Skip to content

Commit d3bd54b

Browse files
JamesChestermanpaulhuggett
authored andcommitted
[AArch64][SVE] Add dot product codegen for partial reductions with no binary operation on input (llvm#120207)
Add codegen for when the input type has 4 times as many elements as the output type and the input to the partial reduction does not have a binary operation performed on it.
1 parent 6bee566 commit d3bd54b

File tree

3 files changed

+483
-17
lines changed

3 files changed

+483
-17
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21987,21 +21987,35 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
2198721987
SDLoc DL(N);
2198821988

2198921989
SDValue Op2 = N->getOperand(2);
21990-
if (Op2->getOpcode() != ISD::MUL ||
21991-
!ISD::isExtOpcode(Op2->getOperand(0)->getOpcode()) ||
21992-
!ISD::isExtOpcode(Op2->getOperand(1)->getOpcode()))
21993-
return SDValue();
21990+
unsigned Op2Opcode = Op2->getOpcode();
21991+
SDValue MulOpLHS, MulOpRHS;
21992+
bool MulOpLHSIsSigned, MulOpRHSIsSigned;
21993+
if (ISD::isExtOpcode(Op2Opcode)) {
21994+
MulOpLHSIsSigned = MulOpRHSIsSigned = (Op2Opcode == ISD::SIGN_EXTEND);
21995+
MulOpLHS = Op2->getOperand(0);
21996+
MulOpRHS = DAG.getConstant(1, DL, MulOpLHS.getValueType());
21997+
} else if (Op2Opcode == ISD::MUL) {
21998+
SDValue ExtMulOpLHS = Op2->getOperand(0);
21999+
SDValue ExtMulOpRHS = Op2->getOperand(1);
22000+
22001+
unsigned ExtMulOpLHSOpcode = ExtMulOpLHS->getOpcode();
22002+
unsigned ExtMulOpRHSOpcode = ExtMulOpRHS->getOpcode();
22003+
if (!ISD::isExtOpcode(ExtMulOpLHSOpcode) ||
22004+
!ISD::isExtOpcode(ExtMulOpRHSOpcode))
22005+
return SDValue();
2199422006

21995-
SDValue Acc = N->getOperand(1);
21996-
SDValue Mul = N->getOperand(2);
21997-
SDValue ExtMulOpLHS = Mul->getOperand(0);
21998-
SDValue ExtMulOpRHS = Mul->getOperand(1);
22007+
MulOpLHSIsSigned = ExtMulOpLHSOpcode == ISD::SIGN_EXTEND;
22008+
MulOpRHSIsSigned = ExtMulOpRHSOpcode == ISD::SIGN_EXTEND;
2199922009

22000-
SDValue MulOpLHS = ExtMulOpLHS->getOperand(0);
22001-
SDValue MulOpRHS = ExtMulOpRHS->getOperand(0);
22002-
if (MulOpLHS.getValueType() != MulOpRHS.getValueType())
22010+
MulOpLHS = ExtMulOpLHS->getOperand(0);
22011+
MulOpRHS = ExtMulOpRHS->getOperand(0);
22012+
22013+
if (MulOpLHS.getValueType() != MulOpRHS.getValueType())
22014+
return SDValue();
22015+
} else
2200322016
return SDValue();
2200422017

22018+
SDValue Acc = N->getOperand(1);
2200522019
EVT ReducedVT = N->getValueType(0);
2200622020
EVT MulSrcVT = MulOpLHS.getValueType();
2200722021

@@ -22015,8 +22029,6 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
2201522029
!(ReducedVT == MVT::v2i32 && MulSrcVT == MVT::v8i8))
2201622030
return SDValue();
2201722031

22018-
bool MulOpLHSIsSigned = ExtMulOpLHS->getOpcode() == ISD::SIGN_EXTEND;
22019-
bool MulOpRHSIsSigned = ExtMulOpRHS->getOpcode() == ISD::SIGN_EXTEND;
2202022032
// If the extensions are mixed, we should lower it to a usdot instead
2202122033
unsigned Opcode = 0;
2202222034
if (MulOpLHSIsSigned != MulOpRHSIsSigned) {
@@ -22032,10 +22044,8 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
2203222044
// USDOT expects the signed operand to be last
2203322045
if (!MulOpRHSIsSigned)
2203422046
std::swap(MulOpLHS, MulOpRHS);
22035-
} else if (MulOpLHSIsSigned)
22036-
Opcode = AArch64ISD::SDOT;
22037-
else
22038-
Opcode = AArch64ISD::UDOT;
22047+
} else
22048+
Opcode = MulOpLHSIsSigned ? AArch64ISD::SDOT : AArch64ISD::UDOT;
2203922049

2204022050
// Partial reduction lowering for (nx)v16i8 to (nx)v4i64 requires an i32 dot
2204122051
// product followed by a zero / sign extension

llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,166 @@ entry:
367367
ret <4 x i64> %partial.reduce
368368
}
369369

370+
define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
371+
; CHECK-DOT-LABEL: udot_no_bin_op:
372+
; CHECK-DOT: // %bb.0:
373+
; CHECK-DOT-NEXT: movi v2.16b, #1
374+
; CHECK-DOT-NEXT: udot v0.4s, v1.16b, v2.16b
375+
; CHECK-DOT-NEXT: ret
376+
;
377+
; CHECK-NODOT-LABEL: udot_no_bin_op:
378+
; CHECK-NODOT: // %bb.0:
379+
; CHECK-NODOT-NEXT: ushll v2.8h, v1.8b, #0
380+
; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0
381+
; CHECK-NODOT-NEXT: ushll v3.4s, v1.4h, #0
382+
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v2.4h
383+
; CHECK-NODOT-NEXT: uaddw2 v2.4s, v3.4s, v2.8h
384+
; CHECK-NODOT-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
385+
; CHECK-NODOT-NEXT: add v0.4s, v2.4s, v0.4s
386+
; CHECK-NODOT-NEXT: ret
387+
%a.wide = zext <16 x i8> %a to <16 x i32>
388+
%partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide)
389+
ret <4 x i32> %partial.reduce
390+
}
391+
392+
define <4 x i32> @sdot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
393+
; CHECK-DOT-LABEL: sdot_no_bin_op:
394+
; CHECK-DOT: // %bb.0:
395+
; CHECK-DOT-NEXT: movi v2.16b, #1
396+
; CHECK-DOT-NEXT: sdot v0.4s, v1.16b, v2.16b
397+
; CHECK-DOT-NEXT: ret
398+
;
399+
; CHECK-NODOT-LABEL: sdot_no_bin_op:
400+
; CHECK-NODOT: // %bb.0:
401+
; CHECK-NODOT-NEXT: sshll v2.8h, v1.8b, #0
402+
; CHECK-NODOT-NEXT: sshll2 v1.8h, v1.16b, #0
403+
; CHECK-NODOT-NEXT: sshll v3.4s, v1.4h, #0
404+
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v2.4h
405+
; CHECK-NODOT-NEXT: saddw2 v2.4s, v3.4s, v2.8h
406+
; CHECK-NODOT-NEXT: saddw2 v0.4s, v0.4s, v1.8h
407+
; CHECK-NODOT-NEXT: add v0.4s, v2.4s, v0.4s
408+
; CHECK-NODOT-NEXT: ret
409+
%a.wide = sext <16 x i8> %a to <16 x i32>
410+
%partial.reduce = tail call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %acc, <16 x i32> %a.wide)
411+
ret <4 x i32> %partial.reduce
412+
}
413+
414+
define <2 x i32> @udot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){
415+
; CHECK-DOT-LABEL: udot_no_bin_op_narrow:
416+
; CHECK-DOT: // %bb.0:
417+
; CHECK-DOT-NEXT: movi v2.8b, #1
418+
; CHECK-DOT-NEXT: udot v0.2s, v1.8b, v2.8b
419+
; CHECK-DOT-NEXT: ret
420+
;
421+
; CHECK-NODOT-LABEL: udot_no_bin_op_narrow:
422+
; CHECK-NODOT: // %bb.0:
423+
; CHECK-NODOT-NEXT: ushll v1.8h, v1.8b, #0
424+
; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
425+
; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0
426+
; CHECK-NODOT-NEXT: ushll2 v3.4s, v1.8h, #0
427+
; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
428+
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
429+
; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
430+
; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
431+
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
432+
; CHECK-NODOT-NEXT: uaddw v1.4s, v2.4s, v4.4h
433+
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
434+
; CHECK-NODOT-NEXT: ret
435+
%a.wide = zext <8 x i8> %a to <8 x i32>
436+
%partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide)
437+
ret <2 x i32> %partial.reduce
438+
}
439+
440+
define <2 x i32> @sdot_no_bin_op_narrow(<2 x i32> %acc, <8 x i8> %a){
441+
; CHECK-DOT-LABEL: sdot_no_bin_op_narrow:
442+
; CHECK-DOT: // %bb.0:
443+
; CHECK-DOT-NEXT: movi v2.8b, #1
444+
; CHECK-DOT-NEXT: sdot v0.2s, v1.8b, v2.8b
445+
; CHECK-DOT-NEXT: ret
446+
;
447+
; CHECK-NODOT-LABEL: sdot_no_bin_op_narrow:
448+
; CHECK-NODOT: // %bb.0:
449+
; CHECK-NODOT-NEXT: sshll v1.8h, v1.8b, #0
450+
; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
451+
; CHECK-NODOT-NEXT: sshll v2.4s, v1.4h, #0
452+
; CHECK-NODOT-NEXT: sshll2 v3.4s, v1.8h, #0
453+
; CHECK-NODOT-NEXT: ext v4.16b, v1.16b, v1.16b, #8
454+
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
455+
; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
456+
; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
457+
; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
458+
; CHECK-NODOT-NEXT: saddw v1.4s, v2.4s, v4.4h
459+
; CHECK-NODOT-NEXT: add v0.2s, v1.2s, v0.2s
460+
; CHECK-NODOT-NEXT: ret
461+
%a.wide = sext <8 x i8> %a to <8 x i32>
462+
%partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> %acc, <8 x i32> %a.wide)
463+
ret <2 x i32> %partial.reduce
464+
}
465+
466+
define <4 x i64> @udot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
467+
; CHECK-DOT-LABEL: udot_no_bin_op_8to64:
468+
; CHECK-DOT: // %bb.0:
469+
; CHECK-DOT-NEXT: movi v3.16b, #1
470+
; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000
471+
; CHECK-DOT-NEXT: udot v4.4s, v2.16b, v3.16b
472+
; CHECK-DOT-NEXT: saddw2 v1.2d, v1.2d, v4.4s
473+
; CHECK-DOT-NEXT: saddw v0.2d, v0.2d, v4.2s
474+
; CHECK-DOT-NEXT: ret
475+
;
476+
; CHECK-NODOT-LABEL: udot_no_bin_op_8to64:
477+
; CHECK-NODOT: // %bb.0:
478+
; CHECK-NODOT-NEXT: ushll v3.8h, v2.8b, #0
479+
; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
480+
; CHECK-NODOT-NEXT: ushll v4.4s, v3.4h, #0
481+
; CHECK-NODOT-NEXT: ushll v5.4s, v2.4h, #0
482+
; CHECK-NODOT-NEXT: ushll2 v3.4s, v3.8h, #0
483+
; CHECK-NODOT-NEXT: ushll2 v2.4s, v2.8h, #0
484+
; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v4.4s
485+
; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v4.2s
486+
; CHECK-NODOT-NEXT: uaddl2 v4.2d, v3.4s, v5.4s
487+
; CHECK-NODOT-NEXT: uaddl v3.2d, v3.2s, v5.2s
488+
; CHECK-NODOT-NEXT: uaddw2 v1.2d, v1.2d, v2.4s
489+
; CHECK-NODOT-NEXT: uaddw v0.2d, v0.2d, v2.2s
490+
; CHECK-NODOT-NEXT: add v1.2d, v4.2d, v1.2d
491+
; CHECK-NODOT-NEXT: add v0.2d, v3.2d, v0.2d
492+
; CHECK-NODOT-NEXT: ret
493+
%a.wide = zext <16 x i8> %a to <16 x i64>
494+
%partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide)
495+
ret <4 x i64> %partial.reduce
496+
}
497+
498+
define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
499+
; CHECK-DOT-LABEL: sdot_no_bin_op_8to64:
500+
; CHECK-DOT: // %bb.0:
501+
; CHECK-DOT-NEXT: movi v3.16b, #1
502+
; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000
503+
; CHECK-DOT-NEXT: sdot v4.4s, v2.16b, v3.16b
504+
; CHECK-DOT-NEXT: saddw2 v1.2d, v1.2d, v4.4s
505+
; CHECK-DOT-NEXT: saddw v0.2d, v0.2d, v4.2s
506+
; CHECK-DOT-NEXT: ret
507+
;
508+
; CHECK-NODOT-LABEL: sdot_no_bin_op_8to64:
509+
; CHECK-NODOT: // %bb.0:
510+
; CHECK-NODOT-NEXT: sshll v3.8h, v2.8b, #0
511+
; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
512+
; CHECK-NODOT-NEXT: sshll v4.4s, v3.4h, #0
513+
; CHECK-NODOT-NEXT: sshll v5.4s, v2.4h, #0
514+
; CHECK-NODOT-NEXT: sshll2 v3.4s, v3.8h, #0
515+
; CHECK-NODOT-NEXT: sshll2 v2.4s, v2.8h, #0
516+
; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v4.4s
517+
; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v4.2s
518+
; CHECK-NODOT-NEXT: saddl2 v4.2d, v3.4s, v5.4s
519+
; CHECK-NODOT-NEXT: saddl v3.2d, v3.2s, v5.2s
520+
; CHECK-NODOT-NEXT: saddw2 v1.2d, v1.2d, v2.4s
521+
; CHECK-NODOT-NEXT: saddw v0.2d, v0.2d, v2.2s
522+
; CHECK-NODOT-NEXT: add v1.2d, v4.2d, v1.2d
523+
; CHECK-NODOT-NEXT: add v0.2d, v3.2d, v0.2d
524+
; CHECK-NODOT-NEXT: ret
525+
%a.wide = sext <16 x i8> %a to <16 x i64>
526+
%partial.reduce = tail call <4 x i64> @llvm.experimental.vector.partial.reduce.add.v4i64.v16i64(<4 x i64> %acc, <16 x i64> %a.wide)
527+
ret <4 x i64> %partial.reduce
528+
}
529+
370530
define <4 x i32> @not_udot(<4 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
371531
; CHECK-LABEL: not_udot:
372532
; CHECK: // %bb.0:
@@ -398,3 +558,91 @@ define <2 x i32> @not_udot_narrow(<2 x i32> %acc, <4 x i8> %u, <4 x i8> %s) {
398558
%partial.reduce = tail call <2 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v16i32(<2 x i32> %acc, <4 x i32> %mult)
399559
ret <2 x i32> %partial.reduce
400560
}
561+
562+
define <2 x i64> @udot_different_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
563+
; CHECK-LABEL: udot_different_types:
564+
; CHECK: // %bb.0: // %entry
565+
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
566+
; CHECK-NEXT: ushll v3.4s, v1.4h, #0
567+
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
568+
; CHECK-NEXT: ushll v4.4s, v2.4h, #0
569+
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
570+
; CHECK-NEXT: umull v5.2d, v1.2s, v2.2s
571+
; CHECK-NEXT: umlal v0.2d, v3.2s, v4.2s
572+
; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.4s
573+
; CHECK-NEXT: umlal2 v5.2d, v3.4s, v4.4s
574+
; CHECK-NEXT: add v0.2d, v5.2d, v0.2d
575+
; CHECK-NEXT: ret
576+
entry:
577+
%a.wide = zext <8 x i16> %a to <8 x i64>
578+
%b.wide = zext <8 x i8> %b to <8 x i64>
579+
%mult = mul nuw nsw <8 x i64> %a.wide, %b.wide
580+
%partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult)
581+
ret <2 x i64> %partial.reduce
582+
}
583+
584+
define <2 x i64> @sdot_different_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
585+
; CHECK-LABEL: sdot_different_types:
586+
; CHECK: // %bb.0: // %entry
587+
; CHECK-NEXT: sshll v2.8h, v2.8b, #0
588+
; CHECK-NEXT: sshll v3.4s, v1.4h, #0
589+
; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
590+
; CHECK-NEXT: sshll v4.4s, v2.4h, #0
591+
; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
592+
; CHECK-NEXT: smull v5.2d, v1.2s, v2.2s
593+
; CHECK-NEXT: smlal v0.2d, v3.2s, v4.2s
594+
; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s
595+
; CHECK-NEXT: smlal2 v5.2d, v3.4s, v4.4s
596+
; CHECK-NEXT: add v0.2d, v5.2d, v0.2d
597+
; CHECK-NEXT: ret
598+
entry:
599+
%a.wide = sext <8 x i16> %a to <8 x i64>
600+
%b.wide = sext <8 x i8> %b to <8 x i64>
601+
%mult = mul nuw nsw <8 x i64> %a.wide, %b.wide
602+
%partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult)
603+
ret <2 x i64> %partial.reduce
604+
}
605+
606+
define <2 x i64> @usdot_different_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
607+
; CHECK-LABEL: usdot_different_types:
608+
; CHECK: // %bb.0: // %entry
609+
; CHECK-NEXT: sshll v2.8h, v2.8b, #0
610+
; CHECK-NEXT: ushll v3.4s, v1.4h, #0
611+
; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0
612+
; CHECK-NEXT: sshll v4.4s, v2.4h, #0
613+
; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0
614+
; CHECK-NEXT: smull v5.2d, v1.2s, v2.2s
615+
; CHECK-NEXT: smlal v0.2d, v3.2s, v4.2s
616+
; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s
617+
; CHECK-NEXT: smlal2 v5.2d, v3.4s, v4.4s
618+
; CHECK-NEXT: add v0.2d, v5.2d, v0.2d
619+
; CHECK-NEXT: ret
620+
entry:
621+
%a.wide = zext <8 x i16> %a to <8 x i64>
622+
%b.wide = sext <8 x i8> %b to <8 x i64>
623+
%mult = mul nuw nsw <8 x i64> %a.wide, %b.wide
624+
%partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult)
625+
ret <2 x i64> %partial.reduce
626+
}
627+
628+
define <2 x i64> @sudot_different_types(<2 x i64> %acc, <8 x i16> %a, <8 x i8> %b){
629+
; CHECK-LABEL: sudot_different_types:
630+
; CHECK: // %bb.0: // %entry
631+
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
632+
; CHECK-NEXT: sshll v3.4s, v1.4h, #0
633+
; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0
634+
; CHECK-NEXT: ushll v4.4s, v2.4h, #0
635+
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0
636+
; CHECK-NEXT: smull v5.2d, v1.2s, v2.2s
637+
; CHECK-NEXT: smlal v0.2d, v3.2s, v4.2s
638+
; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s
639+
; CHECK-NEXT: smlal2 v5.2d, v3.4s, v4.4s
640+
; CHECK-NEXT: add v0.2d, v5.2d, v0.2d
641+
; CHECK-NEXT: ret
642+
entry:
643+
%a.wide = sext <8 x i16> %a to <8 x i64>
644+
%b.wide = zext <8 x i8> %b to <8 x i64>
645+
%mult = mul nuw nsw <8 x i64> %a.wide, %b.wide
646+
%partial.reduce = tail call <2 x i64> @llvm.experimental.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> %acc, <8 x i64> %mult)
647+
ret <2 x i64> %partial.reduce
648+
}

0 commit comments

Comments
 (0)