Skip to content

Commit 06fc125

Browse files
committed
[PGO][PGSO] Add profile guided size optimization tests to X86 ISel Lowering.
1 parent 8372d50 commit 06fc125

File tree

2 files changed

+90
-0
lines changed

2 files changed

+90
-0
lines changed

llvm/test/CodeGen/X86/avx-vperm2x128.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,15 @@ define <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize {
394394
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
395395
ret <4 x double> %s
396396
}
397+
define <4 x double> @shuffle_v4f64_zz23_pgso(<4 x double> %a) !prof !14 {
398+
; ALL-LABEL: shuffle_v4f64_zz23_pgso:
399+
; ALL: # %bb.0:
400+
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
401+
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
402+
; ALL-NEXT: retq
403+
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
404+
ret <4 x double> %s
405+
}
397406

398407
define <4 x double> @shuffle_v4f64_zz45(<4 x double> %a) {
399408
; ALL-LABEL: shuffle_v4f64_zz45:
@@ -429,6 +438,15 @@ define <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize {
429438
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
430439
ret <4 x double> %s
431440
}
441+
define <4 x double> @shuffle_v4f64_zz67_pgso(<4 x double> %a) !prof !14 {
442+
; ALL-LABEL: shuffle_v4f64_zz67_pgso:
443+
; ALL: # %bb.0:
444+
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
445+
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
446+
; ALL-NEXT: retq
447+
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
448+
ret <4 x double> %s
449+
}
432450

433451
define <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) {
434452
; ALL-LABEL: shuffle_v4f64_01zz:
@@ -685,3 +703,20 @@ entry:
685703
%res = add <8 x i32> %shuffle, <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
686704
ret <8 x i32> %res
687705
}
706+
707+
!llvm.module.flags = !{!0}
708+
!0 = !{i32 1, !"ProfileSummary", !1}
709+
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
710+
!2 = !{!"ProfileFormat", !"InstrProf"}
711+
!3 = !{!"TotalCount", i64 10000}
712+
!4 = !{!"MaxCount", i64 10}
713+
!5 = !{!"MaxInternalCount", i64 1}
714+
!6 = !{!"MaxFunctionCount", i64 1000}
715+
!7 = !{!"NumCounts", i64 3}
716+
!8 = !{!"NumFunctions", i64 3}
717+
!9 = !{!"DetailedSummary", !10}
718+
!10 = !{!11, !12, !13}
719+
!11 = !{i32 10000, i64 100, i32 1}
720+
!12 = !{i32 999000, i64 100, i32 1}
721+
!13 = !{i32 999999, i64 1, i32 2}
722+
!14 = !{!"function_entry_count", i64 0}

llvm/test/CodeGen/X86/phaddsub-extract.ll

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,6 +2094,44 @@ define i32 @hadd32_4_optsize(<4 x i32> %x225) optsize {
20942094
ret i32 %x230
20952095
}
20962096

2097+
define i32 @hadd32_4_pgso(<4 x i32> %x225) !prof !14 {
2098+
; SSE3-SLOW-LABEL: hadd32_4_pgso:
2099+
; SSE3-SLOW: # %bb.0:
2100+
; SSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2101+
; SSE3-SLOW-NEXT: paddd %xmm0, %xmm1
2102+
; SSE3-SLOW-NEXT: phaddd %xmm1, %xmm1
2103+
; SSE3-SLOW-NEXT: movd %xmm1, %eax
2104+
; SSE3-SLOW-NEXT: retq
2105+
;
2106+
; SSE3-FAST-LABEL: hadd32_4_pgso:
2107+
; SSE3-FAST: # %bb.0:
2108+
; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
2109+
; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0
2110+
; SSE3-FAST-NEXT: movd %xmm0, %eax
2111+
; SSE3-FAST-NEXT: retq
2112+
;
2113+
; AVX-SLOW-LABEL: hadd32_4_pgso:
2114+
; AVX-SLOW: # %bb.0:
2115+
; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
2116+
; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
2117+
; AVX-SLOW-NEXT: vphaddd %xmm0, %xmm0, %xmm0
2118+
; AVX-SLOW-NEXT: vmovd %xmm0, %eax
2119+
; AVX-SLOW-NEXT: retq
2120+
;
2121+
; AVX-FAST-LABEL: hadd32_4_pgso:
2122+
; AVX-FAST: # %bb.0:
2123+
; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
2124+
; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
2125+
; AVX-FAST-NEXT: vmovd %xmm0, %eax
2126+
; AVX-FAST-NEXT: retq
2127+
%x226 = shufflevector <4 x i32> %x225, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
2128+
%x227 = add <4 x i32> %x225, %x226
2129+
%x228 = shufflevector <4 x i32> %x227, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
2130+
%x229 = add <4 x i32> %x227, %x228
2131+
%x230 = extractelement <4 x i32> %x229, i32 0
2132+
ret i32 %x230
2133+
}
2134+
20972135
define i32 @hadd32_8_optsize(<8 x i32> %x225) optsize {
20982136
; SSE3-LABEL: hadd32_8_optsize:
20992137
; SSE3: # %bb.0:
@@ -2141,3 +2179,20 @@ define i32 @hadd32_16_optsize(<16 x i32> %x225) optsize {
21412179
%x230 = extractelement <16 x i32> %x229, i32 0
21422180
ret i32 %x230
21432181
}
2182+
2183+
!llvm.module.flags = !{!0}
2184+
!0 = !{i32 1, !"ProfileSummary", !1}
2185+
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
2186+
!2 = !{!"ProfileFormat", !"InstrProf"}
2187+
!3 = !{!"TotalCount", i64 10000}
2188+
!4 = !{!"MaxCount", i64 10}
2189+
!5 = !{!"MaxInternalCount", i64 1}
2190+
!6 = !{!"MaxFunctionCount", i64 1000}
2191+
!7 = !{!"NumCounts", i64 3}
2192+
!8 = !{!"NumFunctions", i64 3}
2193+
!9 = !{!"DetailedSummary", !10}
2194+
!10 = !{!11, !12, !13}
2195+
!11 = !{i32 10000, i64 100, i32 1}
2196+
!12 = !{i32 999000, i64 100, i32 1}
2197+
!13 = !{i32 999999, i64 1, i32 2}
2198+
!14 = !{!"function_entry_count", i64 0}

0 commit comments

Comments
 (0)