11
11
; This is explained (with the motivation for such an optimization) in
12
12
; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
13
13
14
- define i8 @add8 (ptr %p ) {
14
+ define i8 @add8 (ptr %p ) # 0 {
15
15
; X64-LABEL: add8:
16
16
; X64: # %bb.0:
17
17
; X64-NEXT: #MEMBARRIER
@@ -39,7 +39,7 @@ define i8 @add8(ptr %p) {
39
39
ret i8 %1
40
40
}
41
41
42
- define i16 @or16 (ptr %p ) {
42
+ define i16 @or16 (ptr %p ) # 0 {
43
43
; X64-LABEL: or16:
44
44
; X64: # %bb.0:
45
45
; X64-NEXT: #MEMBARRIER
@@ -67,7 +67,7 @@ define i16 @or16(ptr %p) {
67
67
ret i16 %1
68
68
}
69
69
70
- define i32 @xor32 (ptr %p ) {
70
+ define i32 @xor32 (ptr %p ) # 0 {
71
71
; X64-LABEL: xor32:
72
72
; X64: # %bb.0:
73
73
; X64-NEXT: #MEMBARRIER
@@ -95,7 +95,7 @@ define i32 @xor32(ptr %p) {
95
95
ret i32 %1
96
96
}
97
97
98
- define i64 @sub64 (ptr %p ) {
98
+ define i64 @sub64 (ptr %p ) # 0 {
99
99
; X64-LABEL: sub64:
100
100
; X64: # %bb.0:
101
101
; X64-NEXT: #MEMBARRIER
@@ -105,11 +105,7 @@ define i64 @sub64(ptr %p) {
105
105
; X86-LABEL: sub64:
106
106
; X86: # %bb.0:
107
107
; X86-NEXT: pushl %ebx
108
- ; X86-NEXT: .cfi_def_cfa_offset 8
109
108
; X86-NEXT: pushl %esi
110
- ; X86-NEXT: .cfi_def_cfa_offset 12
111
- ; X86-NEXT: .cfi_offset %esi, -12
112
- ; X86-NEXT: .cfi_offset %ebx, -8
113
109
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
114
110
; X86-NEXT: movl (%esi), %eax
115
111
; X86-NEXT: movl 4(%esi), %edx
@@ -122,42 +118,32 @@ define i64 @sub64(ptr %p) {
122
118
; X86-NEXT: jne .LBB3_1
123
119
; X86-NEXT: # %bb.2: # %atomicrmw.end
124
120
; X86-NEXT: popl %esi
125
- ; X86-NEXT: .cfi_def_cfa_offset 8
126
121
; X86-NEXT: popl %ebx
127
- ; X86-NEXT: .cfi_def_cfa_offset 4
128
122
; X86-NEXT: retl
129
123
%1 = atomicrmw sub ptr %p , i64 0 syncscope("singlethread" ) seq_cst
130
124
ret i64 %1
131
125
}
132
126
133
- define i128 @or128 (ptr %p ) {
127
+ define i128 @or128 (ptr %p ) # 0 {
134
128
; X64-LABEL: or128:
135
129
; X64: # %bb.0:
136
130
; X64-NEXT: pushq %rax
137
- ; X64-NEXT: .cfi_def_cfa_offset 16
138
131
; X64-NEXT: xorl %esi, %esi
139
132
; X64-NEXT: xorl %edx, %edx
140
133
; X64-NEXT: xorl %ecx, %ecx
141
134
; X64-NEXT: callq __atomic_fetch_or_16@PLT
142
135
; X64-NEXT: popq %rcx
143
- ; X64-NEXT: .cfi_def_cfa_offset 8
144
136
; X64-NEXT: retq
145
137
;
146
138
; X86-GENERIC-LABEL: or128:
147
139
; X86-GENERIC: # %bb.0:
148
140
; X86-GENERIC-NEXT: pushl %ebp
149
- ; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
150
- ; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
151
141
; X86-GENERIC-NEXT: movl %esp, %ebp
152
- ; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
153
142
; X86-GENERIC-NEXT: pushl %ebx
154
143
; X86-GENERIC-NEXT: pushl %edi
155
144
; X86-GENERIC-NEXT: pushl %esi
156
145
; X86-GENERIC-NEXT: andl $-16, %esp
157
146
; X86-GENERIC-NEXT: subl $48, %esp
158
- ; X86-GENERIC-NEXT: .cfi_offset %esi, -20
159
- ; X86-GENERIC-NEXT: .cfi_offset %edi, -16
160
- ; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
161
147
; X86-GENERIC-NEXT: movl 12(%ebp), %edi
162
148
; X86-GENERIC-NEXT: movl 12(%edi), %ecx
163
149
; X86-GENERIC-NEXT: movl 8(%edi), %edx
@@ -201,24 +187,17 @@ define i128 @or128(ptr %p) {
201
187
; X86-GENERIC-NEXT: popl %edi
202
188
; X86-GENERIC-NEXT: popl %ebx
203
189
; X86-GENERIC-NEXT: popl %ebp
204
- ; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
205
190
; X86-GENERIC-NEXT: retl $4
206
191
;
207
192
; X86-ATOM-LABEL: or128:
208
193
; X86-ATOM: # %bb.0:
209
194
; X86-ATOM-NEXT: pushl %ebp
210
- ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
211
- ; X86-ATOM-NEXT: .cfi_offset %ebp, -8
212
195
; X86-ATOM-NEXT: movl %esp, %ebp
213
- ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
214
196
; X86-ATOM-NEXT: pushl %ebx
215
197
; X86-ATOM-NEXT: pushl %edi
216
198
; X86-ATOM-NEXT: pushl %esi
217
199
; X86-ATOM-NEXT: andl $-16, %esp
218
200
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
219
- ; X86-ATOM-NEXT: .cfi_offset %esi, -20
220
- ; X86-ATOM-NEXT: .cfi_offset %edi, -16
221
- ; X86-ATOM-NEXT: .cfi_offset %ebx, -12
222
201
; X86-ATOM-NEXT: movl 12(%ebp), %edi
223
202
; X86-ATOM-NEXT: movl 12(%edi), %ecx
224
203
; X86-ATOM-NEXT: movl 8(%edi), %edx
@@ -262,14 +241,13 @@ define i128 @or128(ptr %p) {
262
241
; X86-ATOM-NEXT: popl %edi
263
242
; X86-ATOM-NEXT: popl %ebx
264
243
; X86-ATOM-NEXT: popl %ebp
265
- ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
266
244
; X86-ATOM-NEXT: retl $4
267
245
%1 = atomicrmw or ptr %p , i128 0 syncscope("singlethread" ) monotonic
268
246
ret i128 %1
269
247
}
270
248
271
249
; For 'and', the idempotent value is (-1)
272
- define i32 @and32 (ptr %p ) {
250
+ define i32 @and32 (ptr %p ) # 0 {
273
251
; X64-LABEL: and32:
274
252
; X64: # %bb.0:
275
253
; X64-NEXT: #MEMBARRIER
@@ -297,7 +275,7 @@ define i32 @and32 (ptr %p) {
297
275
ret i32 %1
298
276
}
299
277
300
- define void @or32_nouse_monotonic (ptr %p ) {
278
+ define void @or32_nouse_monotonic (ptr %p ) # 0 {
301
279
; X64-LABEL: or32_nouse_monotonic:
302
280
; X64: # %bb.0:
303
281
; X64-NEXT: #MEMBARRIER
@@ -325,7 +303,7 @@ define void @or32_nouse_monotonic(ptr %p) {
325
303
}
326
304
327
305
328
- define void @or32_nouse_acquire (ptr %p ) {
306
+ define void @or32_nouse_acquire (ptr %p ) # 0 {
329
307
; X64-LABEL: or32_nouse_acquire:
330
308
; X64: # %bb.0:
331
309
; X64-NEXT: #MEMBARRIER
@@ -352,7 +330,7 @@ define void @or32_nouse_acquire(ptr %p) {
352
330
ret void
353
331
}
354
332
355
- define void @or32_nouse_release (ptr %p ) {
333
+ define void @or32_nouse_release (ptr %p ) # 0 {
356
334
; X64-LABEL: or32_nouse_release:
357
335
; X64: # %bb.0:
358
336
; X64-NEXT: #MEMBARRIER
@@ -379,7 +357,7 @@ define void @or32_nouse_release(ptr %p) {
379
357
ret void
380
358
}
381
359
382
- define void @or32_nouse_acq_rel (ptr %p ) {
360
+ define void @or32_nouse_acq_rel (ptr %p ) # 0 {
383
361
; X64-LABEL: or32_nouse_acq_rel:
384
362
; X64: # %bb.0:
385
363
; X64-NEXT: #MEMBARRIER
@@ -406,7 +384,7 @@ define void @or32_nouse_acq_rel(ptr %p) {
406
384
ret void
407
385
}
408
386
409
- define void @or32_nouse_seq_cst (ptr %p ) {
387
+ define void @or32_nouse_seq_cst (ptr %p ) # 0 {
410
388
; X64-LABEL: or32_nouse_seq_cst:
411
389
; X64: # %bb.0:
412
390
; X64-NEXT: #MEMBARRIER
@@ -434,7 +412,7 @@ define void @or32_nouse_seq_cst(ptr %p) {
434
412
}
435
413
436
414
; TODO: The value isn't used on 32 bit, so the cmpxchg8b is unneeded
437
- define void @or64_nouse_seq_cst (ptr %p ) {
415
+ define void @or64_nouse_seq_cst (ptr %p ) # 0 {
438
416
; X64-LABEL: or64_nouse_seq_cst:
439
417
; X64: # %bb.0:
440
418
; X64-NEXT: #MEMBARRIER
@@ -443,11 +421,7 @@ define void @or64_nouse_seq_cst(ptr %p) {
443
421
; X86-LABEL: or64_nouse_seq_cst:
444
422
; X86: # %bb.0:
445
423
; X86-NEXT: pushl %ebx
446
- ; X86-NEXT: .cfi_def_cfa_offset 8
447
424
; X86-NEXT: pushl %esi
448
- ; X86-NEXT: .cfi_def_cfa_offset 12
449
- ; X86-NEXT: .cfi_offset %esi, -12
450
- ; X86-NEXT: .cfi_offset %ebx, -8
451
425
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
452
426
; X86-NEXT: movl (%esi), %eax
453
427
; X86-NEXT: movl 4(%esi), %edx
@@ -460,43 +434,33 @@ define void @or64_nouse_seq_cst(ptr %p) {
460
434
; X86-NEXT: jne .LBB11_1
461
435
; X86-NEXT: # %bb.2: # %atomicrmw.end
462
436
; X86-NEXT: popl %esi
463
- ; X86-NEXT: .cfi_def_cfa_offset 8
464
437
; X86-NEXT: popl %ebx
465
- ; X86-NEXT: .cfi_def_cfa_offset 4
466
438
; X86-NEXT: retl
467
439
atomicrmw or ptr %p , i64 0 syncscope("singlethread" ) seq_cst
468
440
ret void
469
441
}
470
442
471
443
; TODO: Don't need to lower as sync_and_fetch call
472
- define void @or128_nouse_seq_cst (ptr %p ) {
444
+ define void @or128_nouse_seq_cst (ptr %p ) # 0 {
473
445
; X64-LABEL: or128_nouse_seq_cst:
474
446
; X64: # %bb.0:
475
447
; X64-NEXT: pushq %rax
476
- ; X64-NEXT: .cfi_def_cfa_offset 16
477
448
; X64-NEXT: xorl %esi, %esi
478
449
; X64-NEXT: xorl %edx, %edx
479
450
; X64-NEXT: movl $5, %ecx
480
451
; X64-NEXT: callq __atomic_fetch_or_16@PLT
481
452
; X64-NEXT: popq %rax
482
- ; X64-NEXT: .cfi_def_cfa_offset 8
483
453
; X64-NEXT: retq
484
454
;
485
455
; X86-GENERIC-LABEL: or128_nouse_seq_cst:
486
456
; X86-GENERIC: # %bb.0:
487
457
; X86-GENERIC-NEXT: pushl %ebp
488
- ; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
489
- ; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
490
458
; X86-GENERIC-NEXT: movl %esp, %ebp
491
- ; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
492
459
; X86-GENERIC-NEXT: pushl %ebx
493
460
; X86-GENERIC-NEXT: pushl %edi
494
461
; X86-GENERIC-NEXT: pushl %esi
495
462
; X86-GENERIC-NEXT: andl $-16, %esp
496
463
; X86-GENERIC-NEXT: subl $48, %esp
497
- ; X86-GENERIC-NEXT: .cfi_offset %esi, -20
498
- ; X86-GENERIC-NEXT: .cfi_offset %edi, -16
499
- ; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
500
464
; X86-GENERIC-NEXT: movl 8(%ebp), %esi
501
465
; X86-GENERIC-NEXT: movl 12(%esi), %ecx
502
466
; X86-GENERIC-NEXT: movl 8(%esi), %edi
@@ -535,24 +499,17 @@ define void @or128_nouse_seq_cst(ptr %p) {
535
499
; X86-GENERIC-NEXT: popl %edi
536
500
; X86-GENERIC-NEXT: popl %ebx
537
501
; X86-GENERIC-NEXT: popl %ebp
538
- ; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
539
502
; X86-GENERIC-NEXT: retl
540
503
;
541
504
; X86-ATOM-LABEL: or128_nouse_seq_cst:
542
505
; X86-ATOM: # %bb.0:
543
506
; X86-ATOM-NEXT: pushl %ebp
544
- ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
545
- ; X86-ATOM-NEXT: .cfi_offset %ebp, -8
546
507
; X86-ATOM-NEXT: movl %esp, %ebp
547
- ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
548
508
; X86-ATOM-NEXT: pushl %ebx
549
509
; X86-ATOM-NEXT: pushl %edi
550
510
; X86-ATOM-NEXT: pushl %esi
551
511
; X86-ATOM-NEXT: andl $-16, %esp
552
512
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
553
- ; X86-ATOM-NEXT: .cfi_offset %esi, -20
554
- ; X86-ATOM-NEXT: .cfi_offset %edi, -16
555
- ; X86-ATOM-NEXT: .cfi_offset %ebx, -12
556
513
; X86-ATOM-NEXT: movl 8(%ebp), %esi
557
514
; X86-ATOM-NEXT: movl %esp, %ebx
558
515
; X86-ATOM-NEXT: movl 12(%esi), %ecx
@@ -591,14 +548,13 @@ define void @or128_nouse_seq_cst(ptr %p) {
591
548
; X86-ATOM-NEXT: popl %edi
592
549
; X86-ATOM-NEXT: popl %ebx
593
550
; X86-ATOM-NEXT: popl %ebp
594
- ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
595
551
; X86-ATOM-NEXT: retl
596
552
atomicrmw or ptr %p , i128 0 syncscope("singlethread" ) seq_cst
597
553
ret void
598
554
}
599
555
600
556
601
- define void @or16_nouse_seq_cst (ptr %p ) {
557
+ define void @or16_nouse_seq_cst (ptr %p ) # 0 {
602
558
; X64-LABEL: or16_nouse_seq_cst:
603
559
; X64: # %bb.0:
604
560
; X64-NEXT: #MEMBARRIER
@@ -625,7 +581,7 @@ define void @or16_nouse_seq_cst(ptr %p) {
625
581
ret void
626
582
}
627
583
628
- define void @or8_nouse_seq_cst (ptr %p ) {
584
+ define void @or8_nouse_seq_cst (ptr %p ) # 0 {
629
585
; X64-LABEL: or8_nouse_seq_cst:
630
586
; X64: # %bb.0:
631
587
; X64-NEXT: #MEMBARRIER
@@ -651,6 +607,9 @@ define void @or8_nouse_seq_cst(ptr %p) {
651
607
atomicrmw or ptr %p , i8 0 syncscope("singlethread" ) seq_cst
652
608
ret void
653
609
}
610
+
611
+ attributes #0 = { nounwind }
612
+
654
613
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
655
614
; X86-SLM: {{.*}}
656
615
; X86-SSE2: {{.*}}
0 commit comments