@@ -13,12 +13,8 @@ subroutine gemm_mnk(C, A, B, M, K, N) BIND(C, name="gemm_mnk")
13
13
real (C_double), dimension (K, N), intent (in ) :: B
14
14
integer (C_long) :: mm, kk, nn
15
15
C = 0.0
16
- do concurrent(mm = 1 :M)
17
- do concurrent(nn = 1 :N)
18
- do concurrent(kk = 1 :K)
19
- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
20
- end do
21
- end do
16
+ do concurrent(mm = 1 :M, nn = 1 :N, kk = 1 :K)
17
+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
22
18
end do
23
19
end subroutine gemm_mnk
24
20
subroutine gemm_mkn (C , A , B , M , K , N ) BIND(C, name= " gemm_mkn" )
@@ -28,12 +24,8 @@ subroutine gemm_mkn(C, A, B, M, K, N) BIND(C, name="gemm_mkn")
28
24
real (C_double), dimension (K, N), intent (in ) :: B
29
25
integer (C_long) :: mm, kk, nn
30
26
C = 0.0
31
- do concurrent(mm = 1 :M)
32
- do concurrent(kk = 1 :K)
33
- do concurrent(nn = 1 :N)
34
- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
35
- end do
36
- end do
27
+ do concurrent(mm = 1 :M, kk = 1 :K, nn = 1 :N)
28
+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
37
29
end do
38
30
end subroutine gemm_mkn
39
31
subroutine gemm_nmk (C , A , B , M , K , N ) BIND(C, name= " gemm_nmk" )
@@ -43,12 +35,8 @@ subroutine gemm_nmk(C, A, B, M, K, N) BIND(C, name="gemm_nmk")
43
35
real (C_double), dimension (K, N), intent (in ) :: B
44
36
integer (C_long) :: mm, kk, nn
45
37
C = 0.0
46
- do concurrent(nn = 1 :N)
47
- do concurrent(mm = 1 :M)
48
- do concurrent(kk = 1 :K)
49
- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
50
- end do
51
- end do
38
+ do concurrent(nn = 1 :N, mm = 1 :M, kk = 1 :K)
39
+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
52
40
end do
53
41
end subroutine gemm_nmk
54
42
subroutine gemm_nkm (C , A , B , M , K , N ) BIND(C, name= " gemm_nkm" )
@@ -58,12 +46,8 @@ subroutine gemm_nkm(C, A, B, M, K, N) BIND(C, name="gemm_nkm")
58
46
real (C_double), dimension (K, N), intent (in ) :: B
59
47
integer (C_long) :: mm, kk, nn
60
48
C = 0.0
61
- do concurrent(kk = 1 :K)
62
- do concurrent(nn = 1 :N)
63
- do concurrent(mm = 1 :M)
64
- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
65
- end do
66
- end do
49
+ do concurrent(kk = 1 :K, nn = 1 :N, mm = 1 :M)
50
+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
67
51
end do
68
52
end subroutine gemm_nkm
69
53
subroutine gemm_kmn (C , A , B , M , K , N ) BIND(C, name= " gemm_kmn" )
@@ -73,12 +57,8 @@ subroutine gemm_kmn(C, A, B, M, K, N) BIND(C, name="gemm_kmn")
73
57
real (C_double), dimension (K, N), intent (in ) :: B
74
58
integer (C_long) :: mm, kk, nn
75
59
C = 0.0
76
- do concurrent(kk = 1 :K)
77
- do concurrent(mm = 1 :M)
78
- do concurrent(nn = 1 :N)
79
- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
80
- end do
81
- end do
60
+ do concurrent(kk = 1 :K, mm = 1 :M, nn = 1 :N)
61
+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
82
62
end do
83
63
end subroutine gemm_kmn
84
64
subroutine gemm_knm (C , A , B , M , K , N ) BIND(C, name= " gemm_knm" )
@@ -88,12 +68,8 @@ subroutine gemm_knm(C, A, B, M, K, N) BIND(C, name="gemm_knm")
88
68
real (C_double), dimension (K, N), intent (in ) :: B
89
69
integer (C_long) :: mm, kk, nn
90
70
C = 0.0
91
- do concurrent(kk = 1 :K)
92
- do concurrent(nn = 1 :N)
93
- do concurrent(mm = 1 :M)
94
- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
95
- end do
96
- end do
71
+ do concurrent(kk = 1 :K, nn = 1 :N, mm = 1 :M)
72
+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(kk,nn)
97
73
end do
98
74
end subroutine gemm_knm
99
75
subroutine gemmbuiltin (C , A , B , M , K , N ) BIND(C, name= " gemmbuiltin" )
@@ -110,12 +86,8 @@ subroutine AtmulB(C, A, B, M, K, N) BIND(C, name="AtmulB")
110
86
real (C_double), dimension (K, N), intent (in ) :: B
111
87
integer (C_long) :: mm, kk, nn
112
88
C = 0.0
113
- do concurrent(nn = 1 :N)
114
- do concurrent(mm = 1 :M)
115
- do concurrent(kk = 1 :K)
116
- C(mm,nn) = C(mm,nn) + A(kk,mm) * B(kk,nn)
117
- end do
118
- end do
89
+ do concurrent(nn = 1 :N, mm = 1 :M, kk = 1 :K)
90
+ C(mm,nn) = C(mm,nn) + A(kk,mm) * B(kk,nn)
119
91
end do
120
92
end subroutine AtmulB
121
93
subroutine AtmulBbuiltin (C , A , B , M , K , N ) BIND(C, name= " AtmulBbuiltin" )
@@ -132,12 +104,8 @@ subroutine AmulBt(C, A, B, M, K, N) BIND(C, name="AmulBt")
132
104
real (C_double), dimension (N, K), intent (in ) :: B
133
105
integer (C_long) :: mm, kk, nn
134
106
C = 0.0
135
- do concurrent(kk = 1 :K)
136
- do concurrent(nn = 1 :N)
137
- do concurrent(mm = 1 :M)
138
- C(mm,nn) = C(mm,nn) + A(mm,kk) * B(nn,kk)
139
- end do
140
- end do
107
+ do concurrent(kk = 1 :K, nn = 1 :N, mm = 1 :M)
108
+ C(mm,nn) = C(mm,nn) + A(mm,kk) * B(nn,kk)
141
109
end do
142
110
end subroutine AmulBt
143
111
subroutine AmulBtbuiltin (C , A , B , M , K , N ) BIND(C, name= " AmulBtbuiltin" )
@@ -154,12 +122,8 @@ subroutine AtmulBt(C, A, B, M, K, N) BIND(C, name="AtmulBt")
154
122
real (C_double), dimension (N, K), intent (in ) :: B
155
123
integer (C_long) :: mm, kk, nn
156
124
C = 0.0
157
- do concurrent(nn = 1 :N)
158
- do concurrent(kk = 1 :K)
159
- do concurrent(mm = 1 :M)
160
- C(mm,nn) = C(mm,nn) + A(kk,mm) * B(nn,kk)
161
- end do
162
- end do
125
+ do concurrent(nn = 1 :N, kk = 1 :K, mm = 1 :M)
126
+ C(mm,nn) = C(mm,nn) + A(kk,mm) * B(nn,kk)
163
127
end do
164
128
end subroutine AtmulBt
165
129
subroutine AtmulBtbuiltin (C , A , B , M , K , N ) BIND(C, name= " AtmulBtbuiltin" )
@@ -194,10 +158,8 @@ subroutine dot3(s, x, A, y, M, N) BIND(C, name="dot3")
194
158
real (C_double), intent (in ) :: x(M), A(M,N), y(N)
195
159
real (C_double), intent (out ) :: s
196
160
integer (C_long) :: mm, nn
197
- do concurrent(nn = 1 :N)
198
- do concurrent(mm = 1 :M)
199
- s = s + x(mm) * A(mm, nn) * y(nn)
200
- end do
161
+ do concurrent(nn = 1 :N, mm = 1 :M)
162
+ s = s + x(mm) * A(mm, nn) * y(nn)
201
163
end do
202
164
end subroutine dot3
203
165
! GCC$ builtin (exp) attributes simd (notinbranch) if('x86_64')
@@ -226,10 +188,8 @@ subroutine gemv(y, A, x, M, K) BIND(C, name="gemv")
226
188
real (C_double), dimension (M), intent (out ) :: y
227
189
integer (C_long) :: mm, kk
228
190
y = 0.0
229
- do concurrent(kk = 1 :K)
230
- do concurrent(mm = 1 :M)
231
- y(mm) = y(mm) + A(mm,kk) * x(kk)
232
- end do
191
+ do concurrent(kk = 1 :K, mm = 1 :M)
192
+ y(mm) = y(mm) + A(mm,kk) * x(kk)
233
193
end do
234
194
end subroutine gemv
235
195
subroutine gemvbuiltin (y , A , x , M , K ) BIND(C, name= " gemvbuiltin" )
@@ -244,12 +204,9 @@ subroutine Atmulvb(y, A, x, M, K) BIND(C, name="Atmulvb")
244
204
real (C_double), dimension (M), intent (out ) :: y
245
205
integer (C_long) :: mm, kk
246
206
real (C_double) :: ymm
247
- do concurrent(mm = 1 :M)
248
- ymm = 0
249
- do concurrent(kk = 1 :K)
250
- ymm = ymm + A(kk,mm) * x(kk)
251
- end do
252
- y(mm) = ymm
207
+ y = 0
208
+ do concurrent(mm = 1 :M, kk = 1 :K)
209
+ y(mm) = y(mm) + A(kk,mm) * x(kk)
253
210
end do
254
211
end subroutine Atmulvb
255
212
subroutine Atmulvbbuiltin (y , A , x , M , K ) BIND(C, name= " Atmulvbbuiltin" )
@@ -266,22 +223,18 @@ subroutine unscaledvar(s, A, x, M, N) BIND(C, name="unscaledvar")
266
223
integer (C_long) :: mm, nn
267
224
real (C_double) :: d
268
225
s = 0.0
269
- do concurrent(nn = 1 :N)
270
- do concurrent(mm = 1 :M)
271
- d = A(mm,nn) - x(mm)
272
- s(mm) = s(mm) + d * d
273
- end do
226
+ do concurrent(nn = 1 :N, mm = 1 :M)
227
+ d = A(mm,nn) - x(mm)
228
+ s(mm) = s(mm) + d * d
274
229
end do
275
230
end subroutine unscaledvar
276
231
subroutine aplusBc (D , a , B , c , M , N ) BIND(C, name= " aplusBc" )
277
232
integer (C_long), intent (in ) :: M, N
278
233
real (C_double), intent (in ) :: a(M), B(M,N), c(N)
279
234
real (C_double), dimension (M,N), intent (out ) :: D
280
235
integer (C_long) :: mm, nn
281
- do concurrent(nn = 1 :N)
282
- do concurrent(mm = 1 :M)
283
- D(mm,nn) = a(mm) + B(mm,nn) * c(nn)
284
- end do
236
+ do concurrent(nn = 1 :N, mm = 1 :M)
237
+ D(mm,nn) = a(mm) + B(mm,nn) * c(nn)
285
238
end do
286
239
end subroutine aplusBc
287
240
subroutine OLSlp (lp , y , X , b , N , P ) BIND(C, name= " OLSlp" )
@@ -299,15 +252,25 @@ subroutine OLSlp(lp, y, X, b, N, P) BIND(C, name="OLSlp")
299
252
lp = lp + d* d
300
253
end do
301
254
end subroutine OLSlp
255
+ subroutine OLSlpsplit (lp , y , X , b , N , P ) BIND(C, name= " OLSlpsplit" )
256
+ integer (C_long), intent (in ) :: N, P
257
+ real (C_double), intent (in ) :: y(N), X(N, P), b(P)
258
+ real (C_double), intent (out ) :: lp
259
+ integer (C_long) :: nn, pp
260
+ real (C_double) :: d(N)
261
+ d = y
262
+ do concurrent(nn = 1 :N, pp = 1 :P)
263
+ d(nn) = d(nn) - X(nn,pp) * b(pp)
264
+ end do
265
+ lp = dot_product (d, d)
266
+ end subroutine OLSlpsplit
302
267
subroutine AplusAt (B , A , N ) BIND(C, name= " AplusAt" )
303
268
integer (C_long), intent (in ) :: N
304
269
real (C_double), dimension (N,N), intent (out ) :: B
305
270
real (C_double), dimension (N,N), intent (in ) :: A
306
271
integer (C_long) :: i, j
307
- do concurrent(i = 1 :N)
308
- do concurrent(j = 1 :N)
309
- B(j,i) = A(j,i) + A(i,j)
310
- end do
272
+ do concurrent(i = 1 :N, j = 1 :N)
273
+ B(j,i) = A(j,i) + A(i,j)
311
274
end do
312
275
end subroutine AplusAt
313
276
subroutine AplusAtbuiltin (B , A , N ) BIND(C, name= " AplusAtbuiltin" )
0 commit comments