@@ -12,91 +12,17 @@ define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
12
12
; APPLE-LABEL: define void @small_load_store_loop(
13
13
; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
14
14
; APPLE-NEXT: [[ENTRY:.*]]:
15
- ; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
16
- ; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7
17
- ; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
18
- ; APPLE-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]]
19
- ; APPLE: [[ENTRY_NEW]]:
20
- ; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
21
15
; APPLE-NEXT: br label %[[LOOP:.*]]
22
16
; APPLE: [[LOOP]]:
23
- ; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ]
24
- ; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[LOOP]] ]
17
+ ; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP]] ]
25
18
; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL]], [[SCALE]]
26
19
; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
27
20
; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
28
21
; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL]]
29
22
; APPLE-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4
30
- ; APPLE-NEXT: [[IV_NEXT_EPIL:%.*]] = add nuw nsw i64 [[IV_EPIL]], 1
31
- ; APPLE-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT_EPIL]], [[SCALE]]
32
- ; APPLE-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
33
- ; APPLE-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
34
- ; APPLE-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_EPIL]]
35
- ; APPLE-NEXT: store float [[L_1]], ptr [[GEP_DST_1]], align 4
36
- ; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_EPIL]], 2
37
- ; APPLE-NEXT: [[SCALED_IV_2:%.*]] = mul nuw nsw i64 [[IV_NEXT_1]], [[SCALE]]
38
- ; APPLE-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_2]]
39
- ; APPLE-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
40
- ; APPLE-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_1]]
41
- ; APPLE-NEXT: store float [[L_2]], ptr [[GEP_DST_2]], align 4
42
- ; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_EPIL]], 3
43
- ; APPLE-NEXT: [[SCALED_IV_3:%.*]] = mul nuw nsw i64 [[IV_NEXT_2]], [[SCALE]]
44
- ; APPLE-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_3]]
45
- ; APPLE-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_SRC_3]], align 4
46
- ; APPLE-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_2]]
47
- ; APPLE-NEXT: store float [[L_3]], ptr [[GEP_DST_3]], align 4
48
- ; APPLE-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV_EPIL]], 4
49
- ; APPLE-NEXT: [[SCALED_IV_4:%.*]] = mul nuw nsw i64 [[IV_NEXT_3]], [[SCALE]]
50
- ; APPLE-NEXT: [[GEP_SRC_4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_4]]
51
- ; APPLE-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_SRC_4]], align 4
52
- ; APPLE-NEXT: [[GEP_DST_4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_3]]
53
- ; APPLE-NEXT: store float [[L_4]], ptr [[GEP_DST_4]], align 4
54
- ; APPLE-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV_EPIL]], 5
55
- ; APPLE-NEXT: [[SCALED_IV_5:%.*]] = mul nuw nsw i64 [[IV_NEXT_4]], [[SCALE]]
56
- ; APPLE-NEXT: [[GEP_SRC_5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_5]]
57
- ; APPLE-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_SRC_5]], align 4
58
- ; APPLE-NEXT: [[GEP_DST_5:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_4]]
59
- ; APPLE-NEXT: store float [[L_5]], ptr [[GEP_DST_5]], align 4
60
- ; APPLE-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV_EPIL]], 6
61
- ; APPLE-NEXT: [[SCALED_IV_6:%.*]] = mul nuw nsw i64 [[IV_NEXT_5]], [[SCALE]]
62
- ; APPLE-NEXT: [[GEP_SRC_6:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_6]]
63
- ; APPLE-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_SRC_6]], align 4
64
- ; APPLE-NEXT: [[GEP_DST_6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_5]]
65
- ; APPLE-NEXT: store float [[L_6]], ptr [[GEP_DST_6]], align 4
66
- ; APPLE-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV_EPIL]], 7
67
- ; APPLE-NEXT: [[SCALED_IV_7:%.*]] = mul nuw nsw i64 [[IV_NEXT_6]], [[SCALE]]
68
- ; APPLE-NEXT: [[GEP_SRC_7:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_7]]
69
- ; APPLE-NEXT: [[L_7:%.*]] = load float, ptr [[GEP_SRC_7]], align 4
70
- ; APPLE-NEXT: [[GEP_DST_7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_6]]
71
- ; APPLE-NEXT: store float [[L_7]], ptr [[GEP_DST_7]], align 4
72
- ; APPLE-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV_EPIL]], 8
73
- ; APPLE-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
74
- ; APPLE-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
75
- ; APPLE-NEXT: br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]]
76
- ; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
77
- ; APPLE-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[LOOP]] ]
78
- ; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
79
- ; APPLE: [[EXIT_UNR_LCSSA]]:
80
- ; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
81
- ; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
82
- ; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
83
- ; APPLE: [[LOOP_EPIL_PREHEADER]]:
84
- ; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
85
- ; APPLE: [[LOOP_EPIL]]:
86
- ; APPLE-NEXT: [[IV_EPIL1:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL1:%.*]], %[[LOOP_EPIL]] ]
87
- ; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
88
- ; APPLE-NEXT: [[SCALED_IV_EPIL1:%.*]] = mul nuw nsw i64 [[IV_EPIL1]], [[SCALE]]
89
- ; APPLE-NEXT: [[GEP_SRC_EPIL1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL1]]
90
- ; APPLE-NEXT: [[L_EPIL1:%.*]] = load float, ptr [[GEP_SRC_EPIL1]], align 4
91
- ; APPLE-NEXT: [[GEP_DST_EPIL1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL1]]
92
- ; APPLE-NEXT: store float [[L_EPIL1]], ptr [[GEP_DST_EPIL1]], align 4
93
- ; APPLE-NEXT: [[IV_NEXT_EPIL1]] = add nuw nsw i64 [[IV_EPIL1]], 1
94
- ; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL1]], [[N]]
95
- ; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
96
- ; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
97
- ; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
98
- ; APPLE: [[EXIT_EPILOG_LCSSA]]:
99
- ; APPLE-NEXT: br label %[[EXIT]]
23
+ ; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
24
+ ; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
25
+ ; APPLE-NEXT: br i1 [[EC_EPIL]], label %[[EXIT:.*]], label %[[LOOP]]
100
26
; APPLE: [[EXIT]]:
101
27
; APPLE-NEXT: ret void
102
28
;
0 commit comments