Skip to content

Commit ffb109b

Browse files
committed
[AArch64][SVE] Support logical operation BIC with DestructiveBinary patterns
Logical operation BIC with DestructiveBinary patterns is temporarily removed as causes an assert (commit 3c382ed), so try to fix that. The most significant being that for pseudo instructions that do not have real instructions (including movpfx'd ones) that cover all combinations of register allocation, their expansion will be broken. This is the main reason the zeroing is an experimental feature because it has known bugs. So we add an extra LSL for movprfx expand BIC_ZPZZ_ZERO A, P, A, A when necessary. movprfx z0.s, p0/z, z0.s lsl z0.b, p0/m, z0.b, #0 bic z0.s, p0/m, z0.s, z0.s Depends on D88595
1 parent 6890b9b commit ffb109b

File tree

4 files changed

+101
-22
lines changed

4 files changed

+101
-22
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -447,15 +447,11 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
447447
uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
448448
uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
449449
bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
450-
451450
Register DstReg = MI.getOperand(0).getReg();
452451
bool DstIsDead = MI.getOperand(0).isDead();
453-
454-
if (DType == AArch64::DestructiveBinary)
455-
assert(DstReg != MI.getOperand(3).getReg());
456-
457452
bool UseRev = false;
458453
unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
454+
459455
switch (DType) {
460456
case AArch64::DestructiveBinaryComm:
461457
case AArch64::DestructiveBinaryCommWithRev:
@@ -489,12 +485,13 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
489485
llvm_unreachable("Unsupported Destructive Operand type");
490486
}
491487

492-
#ifndef NDEBUG
493488
// MOVPRFX can only be used if the destination operand
494489
// is the destructive operand, not as any other operand,
495490
// so the Destructive Operand must be unique.
496491
bool DOPRegIsUnique = false;
497492
switch (DType) {
493+
case AArch64::DestructiveBinary:
494+
DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
498495
case AArch64::DestructiveBinaryComm:
499496
case AArch64::DestructiveBinaryCommWithRev:
500497
DOPRegIsUnique =
@@ -512,7 +509,6 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
512509
MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
513510
break;
514511
}
515-
#endif
516512

517513
// Resolve the reverse opcode
518514
if (UseRev) {
@@ -527,23 +523,27 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
527523

528524
// Get the right MOVPRFX
529525
uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
530-
unsigned MovPrfx, MovPrfxZero;
526+
unsigned MovPrfx, LSLZero, MovPrfxZero;
531527
switch (ElementSize) {
532528
case AArch64::ElementSizeNone:
533529
case AArch64::ElementSizeB:
534530
MovPrfx = AArch64::MOVPRFX_ZZ;
531+
LSLZero = AArch64::LSL_ZPmI_B;
535532
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
536533
break;
537534
case AArch64::ElementSizeH:
538535
MovPrfx = AArch64::MOVPRFX_ZZ;
536+
LSLZero = AArch64::LSL_ZPmI_H;
539537
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
540538
break;
541539
case AArch64::ElementSizeS:
542540
MovPrfx = AArch64::MOVPRFX_ZZ;
541+
LSLZero = AArch64::LSL_ZPmI_S;
543542
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
544543
break;
545544
case AArch64::ElementSizeD:
546545
MovPrfx = AArch64::MOVPRFX_ZZ;
546+
LSLZero = AArch64::LSL_ZPmI_D;
547547
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
548548
break;
549549
default:
@@ -555,9 +555,10 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
555555
//
556556
MachineInstrBuilder PRFX, DOP;
557557
if (FalseZero) {
558-
#ifndef NDEBUG
559-
assert(DOPRegIsUnique && "The destructive operand should be unique");
560-
#endif
558+
// If we cannot prefix the requested instruction we'll instead emit a
559+
// prefixed_zeroing_mov for DestructiveBinary.
560+
assert((DOPRegIsUnique || AArch64::DestructiveBinary == DType) &&
561+
"The destructive operand should be unique");
561562
assert(ElementSize != AArch64::ElementSizeNone &&
562563
"This instruction is unpredicated");
563564

@@ -569,10 +570,19 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
569570

570571
// After the movprfx, the destructive operand is same as Dst
571572
DOPIdx = 0;
573+
574+
// Create the additional LSL to zero the lanes when the DstReg is not
575+
// unique. Zeros the lanes in z0 that aren't active in p0 with sequence
576+
// movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
577+
if (DType == AArch64::DestructiveBinary && !DOPRegIsUnique) {
578+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
579+
.addReg(DstReg, RegState::Define)
580+
.add(MI.getOperand(PredIdx))
581+
.addReg(DstReg)
582+
.addImm(0);
583+
}
572584
} else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
573-
#ifndef NDEBUG
574585
assert(DOPRegIsUnique && "The destructive operand should be unique");
575-
#endif
576586
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
577587
.addReg(DstReg, RegState::Define)
578588
.addReg(MI.getOperand(DOPIdx).getReg());
@@ -591,6 +601,7 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
591601
.add(MI.getOperand(PredIdx))
592602
.add(MI.getOperand(SrcIdx));
593603
break;
604+
case AArch64::DestructiveBinary:
594605
case AArch64::DestructiveBinaryImm:
595606
case AArch64::DestructiveBinaryComm:
596607
case AArch64::DestructiveBinaryCommWithRev:

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
433433
defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_orr>;
434434
defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_eor>;
435435
defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_and>;
436-
defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd<null_frag>;
436+
defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_bic>;
437437
} // End HasSVEorSME, UseExperimentalZeroingPseudos
438438

439439
let Predicates = [HasSVEorSME] in {

llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,7 @@ define <vscale x 2 x i64> @and_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64
344344
define <vscale x 16 x i8> @bic_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
345345
; CHECK-LABEL: bic_i8_zero:
346346
; CHECK: // %bb.0:
347-
; CHECK-NEXT: mov z2.b, #0 // =0x0
348-
; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b
347+
; CHECK-NEXT: movprfx z0.b, p0/z, z0.b
349348
; CHECK-NEXT: bic z0.b, p0/m, z0.b, z1.b
350349
; CHECK-NEXT: ret
351350
%a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
@@ -358,8 +357,7 @@ define <vscale x 16 x i8> @bic_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8
358357
define <vscale x 8 x i16> @bic_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
359358
; CHECK-LABEL: bic_i16_zero:
360359
; CHECK: // %bb.0:
361-
; CHECK-NEXT: mov z2.h, #0 // =0x0
362-
; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h
360+
; CHECK-NEXT: movprfx z0.h, p0/z, z0.h
363361
; CHECK-NEXT: bic z0.h, p0/m, z0.h, z1.h
364362
; CHECK-NEXT: ret
365363
%a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
@@ -372,8 +370,7 @@ define <vscale x 8 x i16> @bic_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16
372370
define <vscale x 4 x i32> @bic_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
373371
; CHECK-LABEL: bic_i32_zero:
374372
; CHECK: // %bb.0:
375-
; CHECK-NEXT: mov z2.s, #0 // =0x0
376-
; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
373+
; CHECK-NEXT: movprfx z0.s, p0/z, z0.s
377374
; CHECK-NEXT: bic z0.s, p0/m, z0.s, z1.s
378375
; CHECK-NEXT: ret
379376
%a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
@@ -386,8 +383,7 @@ define <vscale x 4 x i32> @bic_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32
386383
define <vscale x 2 x i64> @bic_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
387384
; CHECK-LABEL: bic_i64_zero:
388385
; CHECK: // %bb.0:
389-
; CHECK-NEXT: mov z2.d, #0 // =0x0
390-
; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
386+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
391387
; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d
392388
; CHECK-NEXT: ret
393389
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
@@ -397,6 +393,39 @@ define <vscale x 2 x i64> @bic_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64
397393
ret <vscale x 2 x i64> %out
398394
}
399395

396+
; BIC (i.e. A & ~A) is illegal operation with movprfx, so the codegen depend on IR before expand-pseudo
397+
define <vscale x 2 x i64> @bic_i64_zero_no_unique_reg(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
398+
; CHECK-LABEL: bic_i64_zero_no_unique_reg:
399+
; CHECK: // %bb.0:
400+
; CHECK-NEXT: mov z1.d, #0 // =0x0
401+
; CHECK-NEXT: mov z1.d, p0/m, z0.d
402+
; CHECK-NEXT: movprfx z0.d, p0/z, z0.d
403+
; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d
404+
; CHECK-NEXT: ret
405+
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
406+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
407+
<vscale x 2 x i64> %a_z,
408+
<vscale x 2 x i64> %a_z)
409+
ret <vscale x 2 x i64> %out
410+
}
411+
412+
; BIC (i.e. A & ~B) is not a commutative operation, so disable it when the
413+
; destination operand is not the destructive operand
414+
define <vscale x 2 x i64> @bic_i64_zero_no_comm(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
415+
; CHECK-LABEL: bic_i64_zero_no_comm:
416+
; CHECK: // %bb.0:
417+
; CHECK-NEXT: mov z2.d, #0 // =0x0
418+
; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
419+
; CHECK-NEXT: bic z1.d, p0/m, z1.d, z0.d
420+
; CHECK-NEXT: mov z0.d, z1.d
421+
; CHECK-NEXT: ret
422+
%a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
423+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
424+
<vscale x 2 x i64> %b,
425+
<vscale x 2 x i64> %a_z)
426+
ret <vscale x 2 x i64> %out
427+
}
428+
400429
declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
401430
declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
402431
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+use-experimental-zeroing-pseudos -run-pass=aarch64-expand-pseudo %s -o - | FileCheck %s
3+
4+
# Should create an additional LSL to zero the lanes as the DstReg is not unique
5+
6+
--- |
7+
define <vscale x 8 x i16> @bic_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a){
8+
%a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
9+
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a_z, <vscale x 8 x i16> %a_z)
10+
ret <vscale x 8 x i16> %out
11+
}
12+
13+
declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
14+
...
15+
---
16+
name: bic_i16_zero
17+
alignment: 4
18+
tracksRegLiveness: true
19+
tracksDebugUserValues: true
20+
registers: []
21+
liveins:
22+
- { reg: '$p0', virtual-reg: '' }
23+
- { reg: '$z0', virtual-reg: '' }
24+
body: |
25+
bb.0 (%ir-block.0):
26+
liveins: $p0, $z0
27+
28+
; CHECK-LABEL: name: bic_i16_zero
29+
; CHECK: liveins: $p0, $z0
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $z0_hi, implicit killed $p0, implicit $z0 {
32+
; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_H $p0, $z0
33+
; CHECK-NEXT: $z0 = LSL_ZPmI_H killed renamable $p0, internal $z0, 0
34+
; CHECK-NEXT: $z0 = BIC_ZPmZ_H killed renamable $p0, internal killed $z0, internal killed renamable $z0
35+
; CHECK-NEXT: }
36+
; CHECK-NEXT: RET undef $lr, implicit $z0
37+
renamable $z0 = BIC_ZPZZ_ZERO_H killed renamable $p0, killed renamable $z0, killed renamable $z0
38+
RET_ReallyLR implicit $z0
39+
...

0 commit comments

Comments
 (0)