Skip to content

[SLP][NFC] Add freeze instruction tests for upcoming support #102215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

jmciver
Copy link
Contributor

@jmciver jmciver commented Aug 6, 2024

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 6, 2024

@llvm/pr-subscribers-llvm-transforms

Author: John McIver (jmciver)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/102215.diff

2 Files Affected:

  • (modified) llvm/test/Transforms/SLPVectorizer/X86/addsub.ll (+101-20)
  • (modified) llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll (+31)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
index 5f8941e9f8893..530643a029a56 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -65,6 +65,87 @@ entry:
   ret void
 }
 
+define void @addsub_freeze() #0 {
+; CHECK-LABEL: @addsub_freeze(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @b, align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @c, align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @d, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr @e, align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[ADD]], [[ADD1]]
+; CHECK-NEXT:    [[FREEZE_ADD2:%.*]] = freeze i32 [[ADD2]]
+; CHECK-NEXT:    store i32 [[FREEZE_ADD2]], ptr @a, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[ADD3]], [[ADD4]]
+; CHECK-NEXT:    [[FREEZE_SUB:%.*]] = freeze i32 [[SUB]]
+; CHECK-NEXT:    store i32 [[FREEZE_SUB]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[ADD6]]
+; CHECK-NEXT:    [[FREEZE_ADD7:%.*]] = freeze i32 [[ADD7]]
+; CHECK-NEXT:    store i32 [[FREEZE_ADD7]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
+; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[SUB10:%.*]] = sub nsw i32 [[ADD8]], [[ADD9]]
+; CHECK-NEXT:    [[FREEZE_SUB10:%.*]] = freeze i32 [[SUB10]]
+; CHECK-NEXT:    store i32 [[FREEZE_SUB10]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = load i32, ptr @b, align 4
+  %1 = load i32, ptr @c, align 4
+  %add = add nsw i32 %0, %1
+  %2 = load i32, ptr @d, align 4
+  %3 = load i32, ptr @e, align 4
+  %add1 = add nsw i32 %2, %3
+  %add2 = add nsw i32 %add, %add1
+  %freeze.add2 = freeze i32 %add2
+  store i32 %freeze.add2, ptr @a, align 4
+  %4 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
+  %5 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
+  %add3 = add nsw i32 %4, %5
+  %6 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
+  %7 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
+  %add4 = add nsw i32 %6, %7
+  %sub = sub nsw i32 %add3, %add4
+  %freeze.sub = freeze i32 %sub
+  store i32 %freeze.sub, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
+  %8 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
+  %9 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
+  %add5 = add nsw i32 %8, %9
+  %10 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
+  %11 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
+  %add6 = add nsw i32 %10, %11
+  %add7 = add nsw i32 %add5, %add6
+  %freeze.add7 = freeze i32 %add7
+  store i32 %freeze.add7, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
+  %12 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
+  %13 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
+  %add8 = add nsw i32 %12, %13
+  %14 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
+  %15 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
+  %add9 = add nsw i32 %14, %15
+  %sub10 = sub nsw i32 %add8, %add9
+  %freeze.sub10 = freeze i32 %sub10
+  store i32 %freeze.sub10, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
+  ret void
+}
+
 ; Function Attrs: nounwind uwtable
 define void @subadd() #0 {
 ; CHECK-LABEL: @subadd(
@@ -301,14 +382,14 @@ define void @reorder_alt_subTree() #0 {
 
 define void @reorder_alt_rightsubTree(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %d) {
 ; CHECK-LABEL: @reorder_alt_rightsubTree(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[D:%.*]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP7]], [[TMP2]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    store <2 x double> [[TMP10]], ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[D:%.*]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    store <2 x double> [[TMP7]], ptr [[C:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %1 = load double, ptr %a
@@ -332,20 +413,20 @@ define void @reorder_alt_rightsubTree(ptr nocapture %c, ptr noalias nocapture re
 
 define void @vec_shuff_reorder() #0 {
 ; CHECK-LABEL: @vec_shuff_reorder(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @fa, align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr @fb, align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr @fa, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr @fb, align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP15:%.*]] = fadd <4 x float> [[TMP10]], [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = fsub <4 x float> [[TMP10]], [[TMP14]]
-; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    store <4 x float> [[TMP17]], ptr @fc, align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fsub <4 x float> [[TMP7]], [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    store <4 x float> [[TMP13]], ptr @fc, align 4
 ; CHECK-NEXT:    ret void
 ;
   %1 = load float, ptr @fb, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
index 28e837c2d7a4e..400d1ac38faba 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
@@ -46,6 +46,37 @@ define void @fmuladd_2f64() #0 {
   ret void
 }
 
+define void @fmuladd_2f64_freeze() #0 {
+; CHECK-LABEL: @fmuladd_2f64_freeze(
+; CHECK-NEXT:    [[A0:%.*]] = load double, ptr @srcA64, align 8
+; CHECK-NEXT:    [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[B0:%.*]] = load double, ptr @srcB64, align 8
+; CHECK-NEXT:    [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[C0:%.*]] = load double, ptr @srcC64, align 8
+; CHECK-NEXT:    [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
+; CHECK-NEXT:    [[FMULADD0:%.*]] = call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[C0]])
+; CHECK-NEXT:    [[FMULADD1:%.*]] = call double @llvm.fmuladd.f64(double [[A1]], double [[B1]], double [[C1]])
+; CHECK-NEXT:    [[FREEZE0:%.*]] = freeze double [[FMULADD0]]
+; CHECK-NEXT:    [[FREEZE1:%.*]] = freeze double [[FMULADD1]]
+; CHECK-NEXT:    store double [[FREEZE0]], ptr @dst64, align 8
+; CHECK-NEXT:    store double [[FREEZE1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; CHECK-NEXT:    ret void
+;
+  %a0 = load double, ptr @srcA64, align 8
+  %a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
+  %b0 = load double, ptr @srcB64, align 8
+  %b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
+  %c0 = load double, ptr @srcC64, align 8
+  %c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
+  %fmuladd0 = call double @llvm.fmuladd.f64(double %a0, double %b0, double %c0)
+  %fmuladd1 = call double @llvm.fmuladd.f64(double %a1, double %b1, double %c1)
+  %freeze0 = freeze double %fmuladd0
+  %freeze1 = freeze double %fmuladd1
+  store double %freeze0, ptr @dst64, align 8
+  store double %freeze1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+  ret void
+}
+
 define void @fmuladd_4f64() #0 {
 ; SSE-LABEL: @fmuladd_4f64(
 ; SSE-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8

@alexey-bataev alexey-bataev changed the title [SLP][test] Implement freeze instruction tests for upcoming support [SLP][NFC] Add freeze instruction tests for upcoming support Aug 6, 2024
Copy link
Member

@alexey-bataev alexey-bataev left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

@nunoplopes nunoplopes merged commit ad8a2e4 into llvm:main Aug 7, 2024
9 checks passed
@jmciver jmciver deleted the development/jmciver/slp-vectorizer-with-freeze-part1 branch August 7, 2024 20:11
jmciver added a commit to jmciver/llvm-project that referenced this pull request Aug 7, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants