@@ -12,13 +12,13 @@ func @main() {
12
12
13
13
%reps = constant 1 : index
14
14
15
- %t_start = call @rtclock () : () -> f16
15
+ %t_start = call @rtclock () : () -> f64
16
16
affine.for %arg0 = 0 to 5 {
17
17
linalg.fill (%C , %cf1 ) : memref <16 x16 xf32 >, f32
18
18
call @sgemm_naive (%A , %B , %C ) : (memref <16 x16 xf32 >, memref <16 x16 xf32 >, memref <16 x16 xf32 >) -> ()
19
19
}
20
- %t_end = call @rtclock () : () -> f16
21
- %t = subf %t_end , %t_start : f16
20
+ %t_end = call @rtclock () : () -> f64
21
+ %t = subf %t_end , %t_start : f64
22
22
23
23
%pC = memref_cast %C : memref <16 x16 xf32 > to memref <*xf32 >
24
24
call @print_memref_f32 (%pC ) : (memref <*xf32 >) -> ()
@@ -35,9 +35,9 @@ func @main() {
35
35
%f3 = muli %c2 , %f2 : index
36
36
%num_flops = muli %reps , %f3 : index
37
37
%num_flops_i = index_cast %num_flops : index to i16
38
- %num_flops_f = sitofp %num_flops_i : i16 to f16
39
- %flops = divf %num_flops_f , %t : f16
40
- call @print_flops (%flops ) : (f16 ) -> ()
38
+ %num_flops_f = sitofp %num_flops_i : i16 to f64
39
+ %flops = divf %num_flops_f , %t : f64
40
+ call @print_flops (%flops ) : (f64 ) -> ()
41
41
42
42
return
43
43
}
@@ -66,6 +66,6 @@ func @sgemm_naive(%arg0: memref<16x16xf32>, %arg1: memref<16x16xf32>, %arg2: mem
66
66
return
67
67
}
68
68
69
- func @print_flops (f16 )
70
- func @rtclock () -> f16
69
+ func @print_flops (f64 )
70
+ func @rtclock () -> f64
71
71
func @print_memref_f32 (memref <*xf32 >)
0 commit comments