-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Match ZIP and UZP starting from undef elements. #89578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
In case the first element of a zip/uzp mask is undef, the isZIPMask and isUZPMask functions have a 50% chance of picking the wrong "WhichResult", meaning they don't match a zip/uzp where they could. This patch alters the matching code to first check for the first non-undef element, to try and get WhichResult correct.
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesIn case the first element of a zip/uzp mask is undef, the isZIPMask and isUZPMask functions have a 50% chance of picking the wrong "WhichResult", meaning they don't match a zip/uzp where they could. This patch alters the matching code to first check for the first non-undef element, to try and get WhichResult correct. Full diff: https://github.com/llvm/llvm-project/pull/89578.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index 7abaead694d114..a143243a8d3bb3 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -6620,11 +6620,28 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
return (PFEntry >> 30) + 1;
}
-inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for zip1 or zip2 masks of the form:
+/// <0, 8, 1, 9, 2, 10, 3, 11> or
+/// <4, 12, 5, 13, 6, 14, 7, 15>
+inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
- WhichResult = (M[0] == 0 ? 0 : 1);
+ // Check the first non-undef element for which half to use.
+ unsigned WhichResult = 2;
+ for (unsigned i = 0; i != NumElts / 2; i++) {
+ if (M[i * 2] >= 0) {
+ WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1);
+ break;
+ } else if (M[i * 2 + 1] >= 0) {
+ WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1);
+ break;
+ }
+ }
+ if (WhichResult == 2)
+ return false;
+
+ // Check all elements match.
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
@@ -6632,20 +6649,34 @@ inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
Idx += 1;
}
-
+ WhichResultOut = WhichResult;
return true;
}
-inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+/// Return true for uzp1 or uzp2 masks of the form:
+/// <0, 2, 4, 6, 8, 10, 12, 14> or
+/// <1, 3, 5, 7, 9, 11, 13, 15>
+inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
+ // Check the first non-undef element for which half to use.
+ unsigned WhichResult = 2;
+ for (unsigned i = 0; i != NumElts; i++) {
+ if (M[i] >= 0) {
+ WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1);
+ break;
+ }
+ }
+ if (WhichResult == 2)
+ return false;
+
+ // Check all elements match.
for (unsigned i = 0; i != NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != 2 * i + WhichResult)
return false;
}
-
+ WhichResultOut = WhichResult;
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
index 6e01ebc95a1cb8..49a51d96fbc841 100644
--- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
@@ -110,13 +110,9 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vuzpQi16_undef0:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: uzp2.8h v3, v0, v1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: add.8h v0, v0, v3
+; CHECK-NEXT: uzp1.8h v2, v0, v1
+; CHECK-NEXT: uzp2.8h v0, v0, v1
+; CHECK-NEXT: add.8h v0, v2, v0
; CHECK-NEXT: ret
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -127,13 +123,9 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vuzpQi16_undef01:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: uzp2.8h v3, v0, v1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: add.8h v0, v0, v3
+; CHECK-NEXT: uzp1.8h v2, v0, v1
+; CHECK-NEXT: uzp2.8h v0, v0, v1
+; CHECK-NEXT: add.8h v0, v2, v0
; CHECK-NEXT: ret
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -144,13 +136,9 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vuzpQi16_undef012:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI10_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: uzp2.8h v3, v0, v1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT: add.8h v0, v0, v3
+; CHECK-NEXT: uzp1.8h v2, v0, v1
+; CHECK-NEXT: uzp2.8h v0, v0, v1
+; CHECK-NEXT: add.8h v0, v2, v0
; CHECK-NEXT: ret
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 349751dda461f9..4c771cbd2966cc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -142,11 +142,7 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vzip1_undef_01:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %s
@@ -155,11 +151,7 @@ define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vzip1_undef_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %s
@@ -177,11 +169,7 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
; CHECK-LABEL: vzip1_undef_012:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI11_0
-; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %s
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice change, thank you.
In case the first element of a zip/uzp mask is undef, the isZIPMask and isUZPMask functions have a 50% chance of picking the wrong "WhichResult", meaning they don't match a zip/uzp where they could. This patch alters the matching code to first check for the first non-undef element, to try and get WhichResult correct.