@@ -28,3 +28,113 @@ entry:
28
28
store double 0 .0 , ptr @g_0
29
29
ret void
30
30
}
31
+
32
+ %struct.S = type { double , double }
33
+
34
+ define double @fold_addi_from_different_bb (i64 %k , i64 %n , ptr %a ) nounwind {
35
+ ; CHECK-LABEL: fold_addi_from_different_bb:
36
+ ; CHECK: # %bb.0: # %entry
37
+ ; CHECK-NEXT: addi sp, sp, -48
38
+ ; CHECK-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
39
+ ; CHECK-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
40
+ ; CHECK-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
41
+ ; CHECK-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
42
+ ; CHECK-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
43
+ ; CHECK-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
44
+ ; CHECK-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
45
+ ; CHECK-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
46
+ ; CHECK-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
47
+ ; CHECK-NEXT: mv s0, a4
48
+ ; CHECK-NEXT: mv s1, a3
49
+ ; CHECK-NEXT: mv s2, a2
50
+ ; CHECK-NEXT: beqz a3, .LBB2_3
51
+ ; CHECK-NEXT: # %bb.1: # %entry
52
+ ; CHECK-NEXT: slti a1, s1, 0
53
+ ; CHECK-NEXT: beqz a1, .LBB2_4
54
+ ; CHECK-NEXT: .LBB2_2:
55
+ ; CHECK-NEXT: fcvt.d.w s4, zero
56
+ ; CHECK-NEXT: j .LBB2_6
57
+ ; CHECK-NEXT: .LBB2_3:
58
+ ; CHECK-NEXT: seqz a1, s2
59
+ ; CHECK-NEXT: bnez a1, .LBB2_2
60
+ ; CHECK-NEXT: .LBB2_4: # %for.body.lr.ph
61
+ ; CHECK-NEXT: li s3, 0
62
+ ; CHECK-NEXT: li s6, 0
63
+ ; CHECK-NEXT: slli a0, a0, 4
64
+ ; CHECK-NEXT: add a0, s0, a0
65
+ ; CHECK-NEXT: addi s7, a0, 8
66
+ ; CHECK-NEXT: fcvt.d.w s4, zero
67
+ ; CHECK-NEXT: .LBB2_5: # %for.body
68
+ ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
69
+ ; CHECK-NEXT: mv a0, s0
70
+ ; CHECK-NEXT: call f
71
+ ; CHECK-NEXT: ld a0, 0(s7)
72
+ ; CHECK-NEXT: addi s3, s3, 1
73
+ ; CHECK-NEXT: seqz a2, s3
74
+ ; CHECK-NEXT: add s6, s6, a2
75
+ ; CHECK-NEXT: xor a2, s3, s2
76
+ ; CHECK-NEXT: xor a3, s6, s1
77
+ ; CHECK-NEXT: or a2, a2, a3
78
+ ; CHECK-NEXT: fadd.d s4, a0, s4
79
+ ; CHECK-NEXT: bnez a2, .LBB2_5
80
+ ; CHECK-NEXT: .LBB2_6: # %for.cond.cleanup
81
+ ; CHECK-NEXT: mv a0, s4
82
+ ; CHECK-NEXT: mv a1, s5
83
+ ; CHECK-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
84
+ ; CHECK-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
85
+ ; CHECK-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
86
+ ; CHECK-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
87
+ ; CHECK-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
88
+ ; CHECK-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
89
+ ; CHECK-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
90
+ ; CHECK-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
91
+ ; CHECK-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
92
+ ; CHECK-NEXT: addi sp, sp, 48
93
+ ; CHECK-NEXT: ret
94
+ entry:
95
+ %cmp4 = icmp sgt i64 %n , 0
96
+ br i1 %cmp4 , label %for.body.lr.ph , label %for.cond.cleanup
97
+
98
+ for.body.lr.ph: ; preds = %entry
99
+ %y = getelementptr inbounds %struct.S , ptr %a , i64 %k , i32 1
100
+ br label %for.body
101
+
102
+ for.cond.cleanup: ; preds = %for.body, %entry
103
+ %s.0.lcssa = phi double [ 0 .0 , %entry ], [ %add , %for.body ]
104
+ ret double %s.0.lcssa
105
+
106
+ for.body: ; preds = %for.body.lr.ph, %for.body
107
+ %i.06 = phi i64 [ 0 , %for.body.lr.ph ], [ %inc , %for.body ]
108
+ %s.05 = phi double [ 0 .0 , %for.body.lr.ph ], [ %add , %for.body ]
109
+ call void @f (ptr %a )
110
+ %0 = load double , ptr %y , align 8
111
+ %add = fadd double %0 , %s.05
112
+ %inc = add nuw nsw i64 %i.06 , 1
113
+ %exitcond.not = icmp eq i64 %inc , %n
114
+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
115
+ }
116
+
117
+ declare void @f (ptr )
118
+
119
+ define void @split_offset (ptr %dest , double %x ) {
120
+ ; CHECK-LABEL: split_offset:
121
+ ; CHECK: # %bb.0:
122
+ ; CHECK-NEXT: mv a3, a2
123
+ ; CHECK-NEXT: addi a0, a0, 2047
124
+ ; CHECK-NEXT: mv a2, a1
125
+ ; CHECK-NEXT: addi a1, a0, 1
126
+ ; CHECK-NEXT: sd a2, 1(a0)
127
+ ; CHECK-NEXT: sd a2, 8(a1)
128
+ ; CHECK-NEXT: sd a2, 16(a1)
129
+ ; CHECK-NEXT: sd a2, 24(a1)
130
+ ; CHECK-NEXT: ret
131
+ %p1 = getelementptr double , ptr %dest , i32 256
132
+ store double %x , ptr %p1
133
+ %p2 = getelementptr double , ptr %dest , i32 257
134
+ store double %x , ptr %p2
135
+ %p3 = getelementptr double , ptr %dest , i32 258
136
+ store double %x , ptr %p3
137
+ %p4 = getelementptr double , ptr %dest , i32 259
138
+ store double %x , ptr %p4
139
+ ret void
140
+ }
0 commit comments