@@ -45,62 +45,62 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
45
45
;
46
46
; CHECKPTX71-LABEL: test(
47
47
; CHECKPTX71: {
48
- ; CHECKPTX71-NEXT: .reg .pred %p<5>;
49
- ; CHECKPTX71-NEXT: .reg .b16 %rs<34>;
50
- ; CHECKPTX71-NEXT: .reg .b32 %r<4>;
51
- ; CHECKPTX71-NEXT: .reg .f32 %f<12>;
48
+ ; CHECKPTX71-NEXT: .reg .pred %p<5>;
49
+ ; CHECKPTX71-NEXT: .reg .b16 %rs<34>;
50
+ ; CHECKPTX71-NEXT: .reg .b32 %r<4>;
51
+ ; CHECKPTX71-NEXT: .reg .f32 %f<12>;
52
52
; CHECKPTX71-EMPTY:
53
53
; CHECKPTX71-NEXT: // %bb.0:
54
- ; CHECKPTX71-NEXT: ld.param.b16 %rs13, [test_param_3];
55
- ; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2];
56
- ; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1];
57
- ; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0];
58
- ; CHECKPTX71-NEXT: ld.b16 %rs30, [%r1];
59
- ; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs13;
60
- ; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start
54
+ ; CHECKPTX71-NEXT: ld.param.b16 %rs13, [test_param_3];
55
+ ; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2];
56
+ ; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1];
57
+ ; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0];
58
+ ; CHECKPTX71-NEXT: ld.b16 %rs30, [%r1];
59
+ ; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs13;
60
+ ; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start14
61
61
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
62
- ; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs30;
63
- ; CHECKPTX71-NEXT: add.rn.f32 %f3, %f2, %f1;
64
- ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs14, %f3;
65
- ; CHECKPTX71-NEXT: atom.cas.b16 %rs17, [%r1], %rs30, %rs14;
66
- ; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs17, %rs30;
67
- ; CHECKPTX71-NEXT: mov.u16 %rs30, %rs17;
68
- ; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
69
- ; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end
70
- ; CHECKPTX71-NEXT: ld.b16 %rs31, [%r1];
71
- ; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start2
62
+ ; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs30;
63
+ ; CHECKPTX71-NEXT: add.rn.f32 %f3, %f2, %f1;
64
+ ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs14, %f3;
65
+ ; CHECKPTX71-NEXT: atom.cas.b16 %rs17, [%r1], %rs30, %rs14;
66
+ ; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs17, %rs30;
67
+ ; CHECKPTX71-NEXT: mov.u16 %rs30, %rs17;
68
+ ; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
69
+ ; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end13
70
+ ; CHECKPTX71-NEXT: ld.b16 %rs31, [%r1];
71
+ ; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start8
72
72
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
73
- ; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs31;
74
- ; CHECKPTX71-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
75
- ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs18, %f5;
76
- ; CHECKPTX71-NEXT: atom.cas.b16 %rs21, [%r1], %rs31, %rs18;
77
- ; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs21, %rs31;
78
- ; CHECKPTX71-NEXT: mov.u16 %rs31, %rs21;
79
- ; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
80
- ; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end1
81
- ; CHECKPTX71-NEXT: ld.global.b16 %rs32, [%r2];
82
- ; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start8
73
+ ; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs31;
74
+ ; CHECKPTX71-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
75
+ ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs18, %f5;
76
+ ; CHECKPTX71-NEXT: atom.cas.b16 %rs21, [%r1], %rs31, %rs18;
77
+ ; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs21, %rs31;
78
+ ; CHECKPTX71-NEXT: mov.u16 %rs31, %rs21;
79
+ ; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
80
+ ; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end7
81
+ ; CHECKPTX71-NEXT: ld.global.b16 %rs32, [%r2];
82
+ ; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start2
83
83
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
84
- ; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs32;
85
- ; CHECKPTX71-NEXT: add.rn.f32 %f8, %f7, %f1;
86
- ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs22, %f8;
87
- ; CHECKPTX71-NEXT: atom.global.cas.b16 %rs25, [%r2], %rs32, %rs22;
88
- ; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs25, %rs32;
89
- ; CHECKPTX71-NEXT: mov.u16 %rs32, %rs25;
90
- ; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
91
- ; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end7
92
- ; CHECKPTX71-NEXT: ld.shared.b16 %rs33, [%r3];
93
- ; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start14
84
+ ; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs32;
85
+ ; CHECKPTX71-NEXT: add.rn.f32 %f8, %f7, %f1;
86
+ ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs22, %f8;
87
+ ; CHECKPTX71-NEXT: atom.global.cas.b16 %rs25, [%r2], %rs32, %rs22;
88
+ ; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs25, %rs32;
89
+ ; CHECKPTX71-NEXT: mov.u16 %rs32, %rs25;
90
+ ; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
91
+ ; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end1
92
+ ; CHECKPTX71-NEXT: ld.shared.b16 %rs33, [%r3];
93
+ ; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start
94
94
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
95
- ; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs33;
96
- ; CHECKPTX71-NEXT: add.rn.f32 %f11, %f10, %f1;
97
- ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs26, %f11;
98
- ; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs29, [%r3], %rs33, %rs26;
99
- ; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs29, %rs33;
100
- ; CHECKPTX71-NEXT: mov.u16 %rs33, %rs29;
101
- ; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
102
- ; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end13
103
- ; CHECKPTX71-NEXT: ret;
95
+ ; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs33;
96
+ ; CHECKPTX71-NEXT: add.rn.f32 %f11, %f10, %f1;
97
+ ; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs26, %f11;
98
+ ; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs29, [%r3], %rs33, %rs26;
99
+ ; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs29, %rs33;
100
+ ; CHECKPTX71-NEXT: mov.u16 %rs33, %rs29;
101
+ ; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
102
+ ; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
103
+ ; CHECKPTX71-NEXT: ret;
104
104
%r1 = atomicrmw fadd ptr %dp0 , bfloat %val seq_cst
105
105
%r2 = atomicrmw fadd ptr %dp0 , bfloat 1 .0 seq_cst
106
106
%r3 = atomicrmw fadd ptr addrspace (1 ) %dp1 , bfloat %val seq_cst
0 commit comments