@@ -269,20 +269,23 @@ define float @fmul_fma_fast2(float %x) {
269
269
270
270
; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
271
271
272
- ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn :'
272
+ ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee :'
273
273
; FMFDEBUG: fmul afn {{t[0-9]+}}
274
- ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn :'
274
+ ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee :'
275
275
276
- ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn :'
276
+ ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee :'
277
277
; GLOBALDEBUG: fmul afn {{t[0-9]+}}
278
- ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn :'
278
+ ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee :'
279
279
280
- define float @sqrt_afn (float %x ) {
281
- ; FMF-LABEL: sqrt_afn :
280
+ define float @sqrt_afn_ieee (float %x ) # 0 {
281
+ ; FMF-LABEL: sqrt_afn_ieee :
282
282
; FMF: # %bb.0:
283
+ ; FMF-NEXT: addis 3, 2, .LCPI10_2@toc@ha
284
+ ; FMF-NEXT: fabs 0, 1
285
+ ; FMF-NEXT: lfs 2, .LCPI10_2@toc@l(3)
286
+ ; FMF-NEXT: fcmpu 0, 0, 2
283
287
; FMF-NEXT: xxlxor 0, 0, 0
284
- ; FMF-NEXT: fcmpu 0, 1, 0
285
- ; FMF-NEXT: beq 0, .LBB10_2
288
+ ; FMF-NEXT: blt 0, .LBB10_2
286
289
; FMF-NEXT: # %bb.1:
287
290
; FMF-NEXT: xsrsqrtesp 0, 1
288
291
; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha
@@ -298,11 +301,14 @@ define float @sqrt_afn(float %x) {
298
301
; FMF-NEXT: fmr 1, 0
299
302
; FMF-NEXT: blr
300
303
;
301
- ; GLOBAL-LABEL: sqrt_afn :
304
+ ; GLOBAL-LABEL: sqrt_afn_ieee :
302
305
; GLOBAL: # %bb.0:
306
+ ; GLOBAL-NEXT: addis 3, 2, .LCPI10_2@toc@ha
307
+ ; GLOBAL-NEXT: fabs 0, 1
308
+ ; GLOBAL-NEXT: lfs 2, .LCPI10_2@toc@l(3)
309
+ ; GLOBAL-NEXT: fcmpu 0, 0, 2
303
310
; GLOBAL-NEXT: xxlxor 0, 0, 0
304
- ; GLOBAL-NEXT: fcmpu 0, 1, 0
305
- ; GLOBAL-NEXT: beq 0, .LBB10_2
311
+ ; GLOBAL-NEXT: blt 0, .LBB10_2
306
312
; GLOBAL-NEXT: # %bb.1:
307
313
; GLOBAL-NEXT: xsrsqrtesp 0, 1
308
314
; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha
@@ -320,18 +326,16 @@ define float @sqrt_afn(float %x) {
320
326
ret float %rt
321
327
}
322
328
323
- ; The call is now fully 'fast'. This implies that approximation is allowed.
324
-
325
- ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
326
- ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
327
- ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
329
+ ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
330
+ ; FMFDEBUG: fmul afn {{t[0-9]+}}
331
+ ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
328
332
329
- ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast :'
330
- ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
331
- ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast :'
333
+ ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign :'
334
+ ; GLOBALDEBUG: fmul afn {{t[0-9]+}}
335
+ ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign :'
332
336
333
- define float @sqrt_fast (float %x ) {
334
- ; FMF-LABEL: sqrt_fast :
337
+ define float @sqrt_afn_preserve_sign (float %x ) # 1 {
338
+ ; FMF-LABEL: sqrt_afn_preserve_sign :
335
339
; FMF: # %bb.0:
336
340
; FMF-NEXT: xxlxor 0, 0, 0
337
341
; FMF-NEXT: fcmpu 0, 1, 0
@@ -343,14 +347,15 @@ define float @sqrt_fast(float %x) {
343
347
; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3)
344
348
; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4)
345
349
; FMF-NEXT: xsmulsp 1, 1, 0
346
- ; FMF-NEXT: xsmaddasp 2, 1, 0
347
- ; FMF-NEXT: xsmulsp 0, 1, 3
348
- ; FMF-NEXT: xsmulsp 0, 0, 2
350
+ ; FMF-NEXT: xsmulsp 0, 1, 0
351
+ ; FMF-NEXT: xsmulsp 1, 1, 2
352
+ ; FMF-NEXT: xsaddsp 0, 0, 3
353
+ ; FMF-NEXT: xsmulsp 0, 1, 0
349
354
; FMF-NEXT: .LBB11_2:
350
355
; FMF-NEXT: fmr 1, 0
351
356
; FMF-NEXT: blr
352
357
;
353
- ; GLOBAL-LABEL: sqrt_fast :
358
+ ; GLOBAL-LABEL: sqrt_afn_preserve_sign :
354
359
; GLOBAL: # %bb.0:
355
360
; GLOBAL-NEXT: xxlxor 0, 0, 0
356
361
; GLOBAL-NEXT: fcmpu 0, 1, 0
@@ -367,6 +372,116 @@ define float @sqrt_fast(float %x) {
367
372
; GLOBAL-NEXT: xsmulsp 0, 0, 2
368
373
; GLOBAL-NEXT: .LBB11_2:
369
374
; GLOBAL-NEXT: fmr 1, 0
375
+ ; GLOBAL-NEXT: blr
376
+ %rt = call afn float @llvm.sqrt.f32 (float %x )
377
+ ret float %rt
378
+ }
379
+
380
+ ; The call is now fully 'fast'. This implies that approximation is allowed.
381
+
382
+ ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
383
+ ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
384
+ ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
385
+
386
+ ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
387
+ ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
388
+ ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
389
+
390
+ define float @sqrt_fast_ieee (float %x ) #0 {
391
+ ; FMF-LABEL: sqrt_fast_ieee:
392
+ ; FMF: # %bb.0:
393
+ ; FMF-NEXT: addis 3, 2, .LCPI12_2@toc@ha
394
+ ; FMF-NEXT: fabs 0, 1
395
+ ; FMF-NEXT: lfs 2, .LCPI12_2@toc@l(3)
396
+ ; FMF-NEXT: fcmpu 0, 0, 2
397
+ ; FMF-NEXT: xxlxor 0, 0, 0
398
+ ; FMF-NEXT: blt 0, .LBB12_2
399
+ ; FMF-NEXT: # %bb.1:
400
+ ; FMF-NEXT: xsrsqrtesp 0, 1
401
+ ; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha
402
+ ; FMF-NEXT: addis 4, 2, .LCPI12_1@toc@ha
403
+ ; FMF-NEXT: lfs 2, .LCPI12_0@toc@l(3)
404
+ ; FMF-NEXT: lfs 3, .LCPI12_1@toc@l(4)
405
+ ; FMF-NEXT: xsmulsp 1, 1, 0
406
+ ; FMF-NEXT: xsmaddasp 2, 1, 0
407
+ ; FMF-NEXT: xsmulsp 0, 1, 3
408
+ ; FMF-NEXT: xsmulsp 0, 0, 2
409
+ ; FMF-NEXT: .LBB12_2:
410
+ ; FMF-NEXT: fmr 1, 0
411
+ ; FMF-NEXT: blr
412
+ ;
413
+ ; GLOBAL-LABEL: sqrt_fast_ieee:
414
+ ; GLOBAL: # %bb.0:
415
+ ; GLOBAL-NEXT: addis 3, 2, .LCPI12_2@toc@ha
416
+ ; GLOBAL-NEXT: fabs 0, 1
417
+ ; GLOBAL-NEXT: lfs 2, .LCPI12_2@toc@l(3)
418
+ ; GLOBAL-NEXT: fcmpu 0, 0, 2
419
+ ; GLOBAL-NEXT: xxlxor 0, 0, 0
420
+ ; GLOBAL-NEXT: blt 0, .LBB12_2
421
+ ; GLOBAL-NEXT: # %bb.1:
422
+ ; GLOBAL-NEXT: xsrsqrtesp 0, 1
423
+ ; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha
424
+ ; GLOBAL-NEXT: addis 4, 2, .LCPI12_1@toc@ha
425
+ ; GLOBAL-NEXT: lfs 2, .LCPI12_0@toc@l(3)
426
+ ; GLOBAL-NEXT: lfs 3, .LCPI12_1@toc@l(4)
427
+ ; GLOBAL-NEXT: xsmulsp 1, 1, 0
428
+ ; GLOBAL-NEXT: xsmaddasp 2, 1, 0
429
+ ; GLOBAL-NEXT: xsmulsp 0, 1, 3
430
+ ; GLOBAL-NEXT: xsmulsp 0, 0, 2
431
+ ; GLOBAL-NEXT: .LBB12_2:
432
+ ; GLOBAL-NEXT: fmr 1, 0
433
+ ; GLOBAL-NEXT: blr
434
+ %rt = call fast float @llvm.sqrt.f32 (float %x )
435
+ ret float %rt
436
+ }
437
+
438
+ ; The call is now fully 'fast'. This implies that approximation is allowed.
439
+
440
+ ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
441
+ ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
442
+ ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
443
+
444
+ ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
445
+ ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
446
+ ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
447
+
448
+ define float @sqrt_fast_preserve_sign (float %x ) #1 {
449
+ ; FMF-LABEL: sqrt_fast_preserve_sign:
450
+ ; FMF: # %bb.0:
451
+ ; FMF-NEXT: xxlxor 0, 0, 0
452
+ ; FMF-NEXT: fcmpu 0, 1, 0
453
+ ; FMF-NEXT: beq 0, .LBB13_2
454
+ ; FMF-NEXT: # %bb.1:
455
+ ; FMF-NEXT: xsrsqrtesp 0, 1
456
+ ; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha
457
+ ; FMF-NEXT: addis 4, 2, .LCPI13_1@toc@ha
458
+ ; FMF-NEXT: lfs 2, .LCPI13_0@toc@l(3)
459
+ ; FMF-NEXT: lfs 3, .LCPI13_1@toc@l(4)
460
+ ; FMF-NEXT: xsmulsp 1, 1, 0
461
+ ; FMF-NEXT: xsmaddasp 2, 1, 0
462
+ ; FMF-NEXT: xsmulsp 0, 1, 3
463
+ ; FMF-NEXT: xsmulsp 0, 0, 2
464
+ ; FMF-NEXT: .LBB13_2:
465
+ ; FMF-NEXT: fmr 1, 0
466
+ ; FMF-NEXT: blr
467
+ ;
468
+ ; GLOBAL-LABEL: sqrt_fast_preserve_sign:
469
+ ; GLOBAL: # %bb.0:
470
+ ; GLOBAL-NEXT: xxlxor 0, 0, 0
471
+ ; GLOBAL-NEXT: fcmpu 0, 1, 0
472
+ ; GLOBAL-NEXT: beq 0, .LBB13_2
473
+ ; GLOBAL-NEXT: # %bb.1:
474
+ ; GLOBAL-NEXT: xsrsqrtesp 0, 1
475
+ ; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha
476
+ ; GLOBAL-NEXT: addis 4, 2, .LCPI13_1@toc@ha
477
+ ; GLOBAL-NEXT: lfs 2, .LCPI13_0@toc@l(3)
478
+ ; GLOBAL-NEXT: lfs 3, .LCPI13_1@toc@l(4)
479
+ ; GLOBAL-NEXT: xsmulsp 1, 1, 0
480
+ ; GLOBAL-NEXT: xsmaddasp 2, 1, 0
481
+ ; GLOBAL-NEXT: xsmulsp 0, 1, 3
482
+ ; GLOBAL-NEXT: xsmulsp 0, 0, 2
483
+ ; GLOBAL-NEXT: .LBB13_2:
484
+ ; GLOBAL-NEXT: fmr 1, 0
370
485
; GLOBAL-NEXT: blr
371
486
%rt = call fast float @llvm.sqrt.f32 (float %x )
372
487
ret float %rt
@@ -387,21 +502,21 @@ define double @fcmp_nnan(double %a, double %y, double %z) {
387
502
; FMF: # %bb.0:
388
503
; FMF-NEXT: xxlxor 0, 0, 0
389
504
; FMF-NEXT: xscmpudp 0, 1, 0
390
- ; FMF-NEXT: blt 0, .LBB12_2
505
+ ; FMF-NEXT: blt 0, .LBB14_2
391
506
; FMF-NEXT: # %bb.1:
392
507
; FMF-NEXT: fmr 3, 2
393
- ; FMF-NEXT: .LBB12_2 :
508
+ ; FMF-NEXT: .LBB14_2 :
394
509
; FMF-NEXT: fmr 1, 3
395
510
; FMF-NEXT: blr
396
511
;
397
512
; GLOBAL-LABEL: fcmp_nnan:
398
513
; GLOBAL: # %bb.0:
399
514
; GLOBAL-NEXT: xxlxor 0, 0, 0
400
515
; GLOBAL-NEXT: xscmpudp 0, 1, 0
401
- ; GLOBAL-NEXT: blt 0, .LBB12_2
516
+ ; GLOBAL-NEXT: blt 0, .LBB14_2
402
517
; GLOBAL-NEXT: # %bb.1:
403
518
; GLOBAL-NEXT: fmr 3, 2
404
- ; GLOBAL-NEXT: .LBB12_2 :
519
+ ; GLOBAL-NEXT: .LBB14_2 :
405
520
; GLOBAL-NEXT: fmr 1, 3
406
521
; GLOBAL-NEXT: blr
407
522
%cmp = fcmp nnan ult double %a , 0 .0
@@ -477,3 +592,5 @@ define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
477
592
ret float %add
478
593
}
479
594
595
+ attributes #0 = { "denormal-fp-math" ="ieee,ieee" }
596
+ attributes #1 = { "denormal-fp-math" ="preserve-sign,preserve-sign" }
0 commit comments